# Load & transform the data
# Implement logistic regression
# Train & predict
# 

In [9]:
# task 1
import pandas as pd

df = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/rk7VDaPjMp1h5VXS-cUyMg/league-of-legends-data-large.csv')
target = "win"
features = df.columns.drop(target)
X = df[features]
y = df[target]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# standard scaler
# convert to tensors

In [39]:
# tasks 2, 3, 4 (implementation, not usage)
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.base import BaseEstimator, ClassifierMixin
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class LogisticRegression(nn.Module, BaseEstimator, ClassifierMixin):

    def __init__(self, n_features=8, epochs=1000,
                 learning_rate=0.01, weight_decay=0):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_features, 1)
        self.sigmoid = nn.Sigmoid()
        self.n_features = n_features
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.epochs = epochs

    def forward(self, x):
        y_pred = self.linear(x)
        y_pred = self.sigmoid(y_pred)
        return y_pred

    def fit(self, X, y):
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32).view(-1, 1)        
        criterion = nn.BCELoss()
        optimizer = optim.SGD(self.parameters(),
                              lr=self.learning_rate, weight_decay=self.weight_decay)
        for epoch in range(self.epochs):
            optimizer.zero_grad()
            y_pred = self.forward(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            if epoch+1 % 100 == 0:
                print(f'Epoch {epoch+1}/{self.epochs}, BCE Loss: {loss.item()}')
        return self

    def predict(self, X):
        X = torch.tensor(X, dtype=torch.float32)
        y_pred = self.forward(X)
        return y_pred.detach().numpy()
    
    # task 6
    def save(self, path):
        torch.save(self.state_dict(), path)
    
    def load(self, path):
        self.load_state_dict(torch.load(path))
        return self
    
    # task 8
    def plot_feature_importance(self, feature_names):

        coefficients = self.linear.weight.detach().numpy().flatten()        
        importance_df = pd.DataFrame({
            'Feature': feature_names,
            'Coefficient': coefficients
        })
        importance_df['AbsCoef'] = abs(importance_df['Coefficient'])
        importance_df = importance_df.sort_values('AbsCoef', ascending=True)
        
        plt.figure(figsize=(10, 6))
        sns.barplot(data=importance_df, x='Coefficient', y='Feature')
        plt.title('Feature Importance')
        plt.xlabel('Coefficient Value')
        plt.tight_layout()
        plt.show()


In [11]:
# task 5
# plot confusion matrix
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

# print classification report
from sklearn.metrics import classification_report

def print_classification_report(y_true, y_pred):
    print(classification_report(y_true, y_pred))

# plot ROC curve, calculate AUC
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np

def plot_roc_curve(y_true, y_pred):
    # Calculate ROC curve
    auc = roc_auc_score(y_true, y_pred)
    fpr, tpr, _ = roc_curve(y_true, y_pred)

    sns.set_style("whitegrid")
    plt.figure(figsize=(8, 6))
    sns.lineplot(x=fpr, y=tpr, linewidth=2)
    plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve (AUC = {auc:.3f})')
    plt.show()

In [43]:
# task 7
# hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression())
])

param_grid = {
    #'model__epochs': [1000, 2000],
    'model__learning_rate': [0.01, 0.05, 0.1],
    #'model__weight_decay': [0.01, 0.1]
}

grid = GridSearchCV(pipe, param_grid, cv=5, n_jobs=5, error_score='raise')
grid.fit(X_train, y_train)
grid.best_params_

ValueError: could not determine the shape of object type 'Series'