In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]

# -----------------------------
# Load data
# -----------------------------
df = pd.read_csv(filename)

# -----------------------------
# Find and parse date column
# -----------------------------
print("Columns:", df.columns)

# CHANGE THIS if column name is different
df["date"] = pd.to_datetime(df["date"])

# -----------------------------
# Create Season column
# -----------------------------
df["Season"] = df["date"].dt.month.apply(
    lambda x: "Summer" if x in [7, 8] else "School"
)

# -----------------------------
# Select numeric features
# -----------------------------
numeric_cols = df.select_dtypes(include=[np.number]).columns
print("Numeric columns used:", numeric_cols)

X = df[numeric_cols]
y = df["Season"]

# -----------------------------
# Train-test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# -----------------------------
# Scaling
# -----------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------
# Logistic Regression
# -----------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# -----------------------------
# Evaluation
# -----------------------------
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Saving akbank_clean_categorized_v3.csv to akbank_clean_categorized_v3.csv
Columns: Index(['date', 'amount', 'balance', 'category'], dtype='object')
Numeric columns used: Index(['amount', 'balance'], dtype='object')
Accuracy: 0.868421052631579

Classification Report:
               precision    recall  f1-score   support

      School       0.87      1.00      0.93       363
      Summer       0.00      0.00      0.00        55

    accuracy                           0.87       418
   macro avg       0.43      0.50      0.46       418
weighted avg       0.75      0.87      0.81       418


Confusion Matrix:
 [[363   0]
 [ 55   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
