In [68]:
# importing Libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score
from collections import Counter
from torch.utils.data import DataLoader, WeightedRandomSampler

Reading the data

In [69]:
df=pd.read_csv('train.csv')

In [70]:
df

Unnamed: 0,labels,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,...,f_1190,f_1191,f_1192,f_1193,f_1194,f_1195,f_1196,f_1197,f_1198,f_1199
0,0,-2.033875,0.978446,-0.142131,-0.177117,-1.470684,1.669562,-0.196530,-0.125239,-0.452284,...,-1.111266,0.716084,0.060039,0.301279,-1.174846,-1.076498,-0.069452,-0.604012,-2.179176,0.558003
1,1,-0.348835,0.294815,-0.557577,-2.020773,-1.234715,1.633930,-1.680658,-0.358146,0.166122,...,0.735240,0.829781,1.521941,1.347946,0.754505,1.330642,-0.754453,0.582956,0.252671,1.495870
2,1,0.113248,-0.607726,-0.947791,0.830851,0.998291,0.498321,-1.493958,0.789572,-1.311018,...,0.104698,0.616189,-1.035953,2.111387,-0.984415,1.148076,-1.433554,0.243372,0.170083,1.274795
3,0,1.223321,-0.479048,-1.925789,1.680377,0.021840,-1.453307,0.605559,-0.019024,1.065448,...,0.360237,-1.957863,-0.123384,1.505329,0.660290,-1.769443,-0.547756,-0.568122,0.244645,0.982116
4,0,0.160109,0.422684,-0.308029,0.227744,0.432854,0.608348,0.193832,1.035091,-0.538868,...,0.416629,1.441766,0.212572,-0.994721,1.143999,-2.166923,-1.199248,-1.028636,0.752791,0.317169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5245,0,1.157565,-0.142219,1.043992,1.144946,1.195423,0.248978,-1.505100,-0.874137,-1.782724,...,1.195423,-0.255793,-0.154838,0.413029,-0.482939,-1.277953,-0.445082,1.195423,-0.924614,-0.432462
5246,0,1.424709,0.235910,1.356778,1.368099,-0.318862,1.039765,-0.986854,-0.330184,-1.383120,...,1.424709,-1.066107,0.881258,-0.488691,-1.281223,-1.213291,0.122692,1.175627,-1.145360,0.451026
5247,1,-0.375687,1.524455,0.012514,-0.007917,0.073809,-0.906909,-1.254247,1.606182,0.298557,...,-0.028349,-0.968204,-1.233815,1.626613,-0.191802,1.115823,0.380284,-0.293960,0.135104,1.381434
5248,1,-0.478238,1.666142,0.049609,-0.428752,-0.362771,1.798104,-0.214314,0.775400,-0.379267,...,-0.428752,-1.121552,-0.379267,-0.593705,0.049609,1.765114,0.313533,-0.329781,-1.220524,0.033114


Each row has already been flattend and processed hence only to make batches and train a neural network.

In [71]:
labels = df.iloc[:, 0]
features = df.iloc[:, 1:]

# Creting tensors by conveting to numpy array and then to tensors.
labels = labels.to_numpy()
features = features.to_numpy()

# Convert numpy arrays to PyTorch tensors.
labels = torch.from_numpy(labels).long()
features = torch.from_numpy(features).float()

In [72]:
# Split the data into training and testing sets
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.01, random_state=4232)

# Using Datasets 
train_dataset = TensorDataset(features_train, labels_train)
train_loader = DataLoader(train_dataset, batch_size=160, shuffle=True)

In [64]:
# Get the class labels from the train loader dataset
class_labels = [label for _, label in train_loader.dataset]

# Calculate the class counts
class_counts = torch.bincount(torch.tensor(class_labels))
total_samples = class_counts.sum()

# Convert class counts to real numbers
class_counts = class_counts.float()

# Calculate class weights
class_weights = total_samples / (len(class_counts) * class_counts)

# Create a list of class weights for each training sample
sample_weights = [class_weights[label] for _, label in train_loader.dataset]

# Create a WeightedRandomSampler with the sample weights
sampler = WeightedRandomSampler(sample_weights, num_samples=len(train_loader.dataset), replacement=True)

# Update the train loader with the weighted sampler
train_loader = DataLoader(train_loader.dataset, batch_size=train_loader.batch_size, sampler=sampler, shuffle=False, num_workers=train_loader.num_workers, pin_memory=train_loader.pin_memory)

# Rest of your training code using the updated train loader

### Neural Network

In [65]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.dropout = nn.Dropout(0.05)
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512,320)
        self.fc4 = nn.Linear(320,128)
        self.fc5 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.dropout(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# Set up the model and optimizer
input_size = features.shape[1]
num_classes = torch.unique(labels).shape[0]
model = NeuralNetwork(input_size, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [66]:
num_epochs = 300
for epoch in range(num_epochs):
    model.train()
    for batch_features, batch_labels in train_loader:
        optimizer.zero_grad()
        # model = nn.Dropout(p=0.2)
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        outputs = model(features_test)
        _, predicted = torch.max(outputs, 1)
        f1 = f1_score(labels_test,predicted)
        accuracy = (predicted == labels_test).sum().item() / labels_test.size(0)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}",f1)

Epoch 1/300, Loss: 0.2099, Accuracy: 0.7736 0.7
Epoch 2/300, Loss: 0.1933, Accuracy: 0.8113 0.7368421052631577
Epoch 3/300, Loss: 0.1194, Accuracy: 0.8113 0.7222222222222222
Epoch 4/300, Loss: 0.0515, Accuracy: 0.9057 0.8571428571428571
Epoch 5/300, Loss: 0.0209, Accuracy: 0.8868 0.8125
Epoch 6/300, Loss: 0.0043, Accuracy: 0.7925 0.6857142857142857
Epoch 7/300, Loss: 0.0044, Accuracy: 0.7925 0.7027027027027027
Epoch 8/300, Loss: 0.0405, Accuracy: 0.7736 0.6666666666666667
Epoch 9/300, Loss: 0.0283, Accuracy: 0.8113 0.7058823529411765
Epoch 10/300, Loss: 0.0563, Accuracy: 0.7358 0.6111111111111113
Epoch 11/300, Loss: 0.0137, Accuracy: 0.8113 0.75
Epoch 12/300, Loss: 0.0123, Accuracy: 0.7358 0.6111111111111113
Epoch 13/300, Loss: 0.0374, Accuracy: 0.7547 0.5806451612903226
Epoch 14/300, Loss: 0.0236, Accuracy: 0.8113 0.7058823529411765
Epoch 15/300, Loss: 0.0022, Accuracy: 0.7925 0.6857142857142857
Epoch 16/300, Loss: 0.0004, Accuracy: 0.7925 0.6857142857142857
Epoch 17/300, Loss: 0.0058

## Working with test data

In [27]:
test_data = pd.read_csv('test.csv')

In [28]:
test_data

Unnamed: 0,id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,...,f_1190,f_1191,f_1192,f_1193,f_1194,f_1195,f_1196,f_1197,f_1198,f_1199
0,1,-3.388242,0.868285,-0.427619,-0.678964,-1.625735,0.262761,1.243040,1.537751,-0.352028,...,-0.776403,-0.662884,-0.257091,-1.168413,0.223260,-0.482520,-0.085453,-0.382265,-0.539349,-1.682404
1,2,-0.496920,0.952381,0.989040,0.451422,0.513516,-0.099658,-1.124326,0.729430,-0.216224,...,0.379635,-1.760084,1.125450,-0.328047,-0.880305,-1.257607,0.964312,2.021104,0.655021,-0.423029
2,3,1.128369,-0.537951,2.544358,1.165254,-1.904994,0.776961,-0.495768,0.060111,-1.418468,...,1.165254,-1.373589,-0.483701,-0.964782,-0.869555,0.066040,-0.444567,-0.531935,-0.878660,1.099488
3,4,0.051253,1.746814,0.681177,1.844524,-0.327977,1.226839,-0.085519,0.379008,-1.003667,...,-0.442288,-2.794472,-0.763468,-0.789832,-0.113209,-2.703150,-2.058728,1.070627,-0.458045,-0.435825
4,5,1.423209,-0.983594,-1.694170,1.197507,1.044211,0.518777,-0.298612,-0.365174,0.738447,...,-2.624450,-3.200223,0.711422,-0.190394,0.337224,-1.656639,0.707360,-0.562290,1.471181,-0.192000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2245,2246,0.889888,-0.319077,0.849589,0.822723,0.876455,0.325704,0.876455,-0.910127,0.889888,...,0.889888,-2.226556,-0.090717,-1.393713,-0.896694,-0.399675,-0.856395,0.876455,0.863022,-0.601169
2246,2247,1.005737,-0.064755,1.163494,1.163494,1.163494,0.724028,0.712760,-0.785929,-1.225394,...,1.163494,-1.270468,-0.932417,-1.169053,-0.008414,-0.605636,-0.323927,1.163494,-1.315541,0.047928
2247,2248,1.252086,1.223561,0.153859,-0.987156,0.239435,-0.003031,-1.158309,1.237823,-1.272410,...,0.581740,-1.386512,0.809943,-1.243885,0.153859,-0.630589,1.594391,1.252086,-1.429300,1.408976
2248,2249,1.042624,-0.129166,1.066538,1.030667,1.162195,0.707827,-1.396612,0.014319,-1.025944,...,1.078495,-1.193343,0.086061,-0.081338,-0.978116,-0.368307,-0.129166,1.090452,-1.444440,0.468686


In [29]:
# Extract features from the test data
test_features = test_data.iloc[:, 1:].values

# labels = labels.to_numpy()
# test_features = test_features.to_numpy()

# Convert the features to PyTorch tensor
test_features = torch.from_numpy(test_features).float()

# Create a PyTorch data loader for the test data
test_dataset = TensorDataset(test_features)
test_loader = DataLoader(test_dataset, batch_size=32)

In [30]:
model.eval()
predictions = []
with torch.no_grad():
    for batch_features in test_loader:
        outputs = model(batch_features[0])
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.tolist())

# Create a DataFrame with the index numbers and predictions
results = pd.DataFrame({'Index': test_data.index, 'Prediction': predictions})

# Save the results to a CSV file
results.to_csv('results.csv', index=False)

In [32]:
predictions

[0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
