In [1]:
import numpy as np
import pandas as pd
import csv
import pickle
from transposeToDummy import transposeToDummy
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from tqdm import tqdm

In [2]:
# program parameters
feature_size = 23
random_seed = 42
learning_rate = 0.01
epochs = 10000

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Data initialization
sigma=np.array([[1,0],[0,1]])
beta_mu=np.array([[1,0],[0,1]])

In [6]:
# Linear combination function
def linear_combination(a):
    global sigma, beta_mu
    numerator=a.T@beta_mu@a
    denominator=a.T@sigma@a

    # We have minimzer. So, we need to return negative value
    return -numerator/denominator
    

In [7]:
# Load data
path='Data/'
df=pd.read_csv(path+'eq2015_modify_category.csv')

In [8]:
# reduce the size of the data
df=df.sample(frac=0.01, random_state=random_seed)

In [9]:
# Split the dataframe into training and testing sets
df, test_df = train_test_split(df, test_size=0.2, random_state=random_seed)

# Print the shapes of the resulting dataframes
print("Training set shape:", df.shape)
print("Testing set shape:", test_df.shape)

Training set shape: (6048, 21)
Testing set shape: (1512, 21)


In [10]:
# Split by label
grouped_data = df.groupby('damage_level')

# Initialize variables
S_ms=[]
mu_ms=[]
total_ms=0
msdf=df.drop(columns=['damage_level'])
msdf=transposeToDummy(msdf,toNumpy=False)
total_mean=msdf.mean()


for label, data in grouped_data:
    # drop damage_level column
    data=data.drop(columns=['damage_level'])
    
    # transpose to dummy
    data=transposeToDummy(data,toNumpy=False)
    
    # calculate S_m
    S_ms.append((data.shape[0]-1)*data.cov())
    total_ms+=data.shape[0]-1

    # calculate mu_m
    mean_ms=data.mean()-total_mean
    mean_ms=mean_ms.to_frame()
    mu_ms.append((data.shape[0])*(mean_ms @ mean_ms.T))
    
# calculate sigma
sigma_ms=sum(S_ms)/total_ms

# calculate beta_mu
beta_mu_ms=sum(mu_ms)/(total_ms+len(grouped_data))


In [11]:
# Convert to tensor
sigma=torch.tensor(sigma_ms.values)
beta_mu=torch.tensor(beta_mu_ms.values)

In [12]:
# move data to device
sigma=sigma.to(device)
beta_mu=beta_mu.to(device)
a=a.to(device)

In [13]:
# Create a new tensor that requires gradients from the original tensor a
a_optim = torch.tensor(a, requires_grad=True)

# Update the optimizer to use the new tensor
optimizer = torch.optim.SGD([a_optim], lr=learning_rate)


  a_optim = torch.tensor(a, requires_grad=True)


In [22]:
# Training
previous_a=a.clone()
previous_a=previous_a.cpu().detach().numpy()
for epoch in tqdm(range(epochs)):
    # Forward pass
    output = linear_combination(a)
    
    # Loss
    loss = output
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 1000 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))
        ms_a=a.cpu().detach().numpy()
        if np.array_equal(previous_a, ms_a):
            print('a did not change')

 11%|█         | 1066/10000 [00:02<00:19, 454.41it/s]

Epoch [1000/10000], Loss: 0.0017
a did not change


 21%|██        | 2107/10000 [00:04<00:12, 626.40it/s]

Epoch [2000/10000], Loss: 0.0017
a did not change


 31%|███       | 3105/10000 [00:05<00:10, 650.48it/s]

Epoch [3000/10000], Loss: 0.0017
a did not change


 41%|████      | 4052/10000 [00:07<00:09, 617.91it/s]

Epoch [4000/10000], Loss: 0.0017
a did not change


 51%|█████     | 5095/10000 [00:09<00:07, 647.28it/s]

Epoch [5000/10000], Loss: 0.0017
a did not change


 61%|██████    | 6100/10000 [00:10<00:06, 586.86it/s]

Epoch [6000/10000], Loss: 0.0017
a did not change


 71%|███████   | 7105/10000 [00:12<00:04, 677.46it/s]

Epoch [7000/10000], Loss: 0.0017
a did not change


 81%|████████  | 8093/10000 [00:13<00:02, 666.36it/s]

Epoch [8000/10000], Loss: 0.0017
a did not change


 91%|█████████ | 9070/10000 [00:15<00:01, 580.26it/s]

Epoch [9000/10000], Loss: 0.0017
a did not change


100%|██████████| 10000/10000 [00:17<00:00, 588.17it/s]

Epoch [10000/10000], Loss: 0.0017
a did not change





In [None]:
# Maximize the vale
result=minimize(linear_combination,a)

In [None]:
print(result.x)

In [None]:
# Get beta_mu
column_means = np.mean(features, axis=0)
print(column_means)

NameError: name 'features' is not defined

In [None]:
# Get sigma