In [None]:
# Elias Zakarrya

# Task 1: Stochastic Gradient Descent

import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from google.colab import files

uploaded_file = files.upload()

dataset = pd.read_csv('gym_members_exercise_tracking.csv')
print(dataset.head())

features = dataset[['Weight (kg)', 'Session_Duration (hours)', 'Workout_Frequency (days/week)']].values
target = dataset['Calories_Burned'].values

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
target_tensor = torch.tensor(target, dtype=torch.float32).view(-1, 1)
features_bias = torch.cat([torch.ones(features_tensor.shape[0], 1), features_tensor], dim=1)

lr = 0.01
reg_param = 0.1
epochs = 100
batch_size = 1

weights = torch.zeros(features_bias.shape[1], 1, requires_grad=True)

def calculate_loss(weights, features_bias, target, reg_param):
    predictions = features_bias @ weights
    loss = torch.mean((predictions - target) ** 2) + reg_param * torch.norm(weights[1:]) ** 2
    return loss

loss_values = []

for epoch in range(epochs):
    for i in range(len(features_bias)):
        features_batch = features_bias[i:i + batch_size]
        target_batch = target_tensor[i:i + batch_size]

        loss = calculate_loss(weights, features_batch, target_batch, reg_param)
        loss.backward()

        with torch.no_grad():
            weights -= lr * weights.grad
            weights.grad.zero_()

    loss_values.append(loss.item())
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_values[-1]}")

plt.plot(loss_values)
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid()
plt.show()

print(f"Trained parameters: {weights.flatten()}")
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Task 1: Data Preprocessing

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import zipfile
from google.colab import files
import os

files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c avazu-ctr-prediction

with zipfile.ZipFile('avazu-ctr-prediction.zip', 'r') as zf:
    zf.extractall('avazu-ctr-prediction')

data_file_path = 'avazu-ctr-prediction/train.gz'
if os.path.exists(data_file_path):
    print("Loading training dataset...")
    df_train = pd.read_csv(data_file_path, compression='gzip', nrows=10000)
else:
    print(f"File not found: {data_file_path}")

if 'df_train' in locals():
    df_train.drop(columns=['id'], inplace=True)
    df_train.fillna(0, inplace=True)
    df_train = pd.get_dummies(df_train, columns=['banner_pos', 'site_id', 'app_id', 'device_id', 'device_ip'], drop_first=True)

    features = df_train.drop(columns=['click'])
    target = df_train['click']

    features = features.apply(pd.to_numeric, errors='coerce').fillna(0)

    if not features.select_dtypes(include=['object']).empty:
        raise ValueError("There are still non-numeric columns in features.")

    feature_tensor = torch.tensor(features.values, dtype=torch.float64)
    target_tensor = torch.tensor(target.values, dtype=torch.float64).view(-1, 1)

test_file_path = 'avazu-ctr-prediction/test.gz'
if os.path.exists(test_file_path):
    print("Loading test dataset...")
    df_test = pd.read_csv(test_file_path, compression='gzip')
else:
    print(f"File not found: {test_file_path}")

if 'df_test' in locals():
    df_test.drop(columns=['id'], inplace=True)
    df_test.fillna(0, inplace=True)
    df_test = pd.get_dummies(df_test, columns=['banner_pos', 'site_id', 'app_id', 'device_id', 'device_ip'], drop_first=True)

    for col in features.columns:
        if col not in df_test.columns:
            df_test[col] = 0

    df_test = df_test.reindex(columns=features.columns, fill_value=0)
    test_tensor = torch.tensor(df_test.values, dtype=torch.float32)

class LogisticRegressionModel(nn.Module):
    def __init__(self, size):
        super(LogisticRegressionModel, self).__init__()
        self.layer = nn.Linear(size, 1)

    def forward(self, x):
        return torch.sigmoid(self.layer(x))

if 'feature_tensor' in locals():
    input_dim = feature_tensor.shape[1]
    lr_model = LogisticRegressionModel(input_dim)
    loss_fn = nn.BCELoss()
    optimizer = optim.SGD(lr_model.parameters(), lr=0.01)

    epochs = 100
    for ep in range(epochs):
        lr_model.train()
        optimizer.zero_grad()
        predictions = lr_model(feature_tensor)
        loss = loss_fn(predictions, target_tensor)
        loss.backward()
        optimizer.step()

        if (ep + 1) % 10 == 0:
            print(f'Epoch [{ep + 1}/{epochs}], Loss: {loss.item():.4f}')

if 'test_tensor' in locals():
    lr_model.eval()
    with torch.no_grad():
        pred_outputs = lr_model(test_tensor)
        pred_values = pred_outputs.numpy()

if 'df_test' in locals():
    submission_df = pd.DataFrame({
        'id': df_test.index,
        'click': pred_values.flatten()
    })

submission_df.to_csv('submission.csv', index=False)

print(submission_df.head())
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Task 3: Task 2: Logistic Regression
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
import zipfile
from google.colab import files
import os

files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c avazu-ctr-prediction

with zipfile.ZipFile('avazu-ctr-prediction.zip', 'r') as zip_ref:
    zip_ref.extractall('avazu-ctr-prediction')

train_file_path = 'avazu-ctr-prediction/train.gz'
test_file_path = 'avazu-ctr-prediction/test.gz'

train_data = pd.read_csv(train_file_path, compression='gzip')
test_data = pd.read_csv(test_file_path, compression='gzip')

train_data.drop(columns=['id'], inplace=True)
train_data.fillna(0, inplace=True)
train_data = pd.get_dummies(train_data, columns=['banner_pos', 'site_id', 'app_id', 'device_id', 'device_ip'], drop_first=True)

X_train = train_data.drop(columns=['click'])
y_train = train_data['click']

X_train = X_train.apply(pd.to_numeric, errors='coerce').fillna(0)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegressionModel(X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

test_data.drop(columns=['id'], inplace=True)
test_data.fillna(0, inplace=True)
test_data = pd.get_dummies(test_data, columns=['banner_pos', 'site_id', 'app_id', 'device_id', 'device_ip'], drop_first=True)

for col in X_train.columns:
    if col not in test_data.columns:
        test_data[col] = 0

test_data = test_data[X_train.columns]
X_test_tensor = torch.tensor(test_data.values, dtype=torch.float32)

model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_predictions = test_outputs.numpy()

precision, recall, _ = precision_recall_curve(y_test, test_predictions)

plt.figure()
plt.plot(recall, precision, marker='.')
plt.title('Precision-Recall Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.grid()
plt.show()







Saving kaggle.json to kaggle (20).json
avazu-ctr-prediction.zip: Skipping, found more recently modified local copy (use --force to force download)
