## Individual training for Charlie

The process for each scenario includes following:
- data preprocessin
- feature extraction (PCA)
- LR
- visualization
- MLP
- hyperparameter tuning
- visualization

**importing libraries**

In [368]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt


In [369]:

scenario_one=pd.read_csv("database/Scenario_1/Scenario_1_C.csv")
scenario_two=pd.read_csv("database/Scenario_2/Scenario_2_C.csv")
scenario_three=pd.read_csv("database/Scenario_3/Scenario_3_C.csv")
scenarios=[scenario_one,scenario_two,scenario_three]

In [370]:
print(scenario_one.columns[-2],scenario_one.columns[-1])

Fermi_energy thermo_prob_norm


**Data preprocessing**

In [393]:
def columnwise_na_elimiation(data):
    """
    The method filter out all columns contains NA value in the set

    Input: pandas dataframe
    Output: non-NA containing pandas dataframe 
    """
    col_na=data.isna().sum(axis=0)
    # print(col_na)
    col_na[col_na!=0]=1
    col_na=col_na.astype(bool)
    # print(col_na)
    col_na=~col_na
    # print(col_na)
    filtered_data=data.loc[:,col_na]
    return filtered_data

nafree_sce1=columnwise_na_elimiation(scenario_one)
nafree_sce2=columnwise_na_elimiation(scenario_two)
nafree_sce3=columnwise_na_elimiation(scenario_three)
nafree_sces=[nafree_sce1,nafree_sce2,nafree_sce3]

data splitting

In [394]:
training_sets=[]
test_sets=[]
for sce in nafree_sces:
    train,test=train_test_split(sce,test_size=0.2)
    trainX=train.iloc[:,:-2]
    trainY=train.iloc[:,-2:]
    testX=test.iloc[:,:-2]
    testY=test.iloc[:,-2:]
    training_sets.append([trainX,trainY])
    test_sets.append([testX,testY])

**Feature Extraction**

In [373]:
from sklearn.decomposition import PCA

In [436]:
pca1=PCA(n_components=5)
pca2=PCA(n_components=5)
pca3=PCA(n_components=5)
pcas=[pca1,pca2,pca3]
for i,sce in enumerate(nafree_sces):
    currentPCA=pcas[i]
    currentTrainX=training_sets[i][0]
    currentTestX=test_sets[i][0]
    currentPCA.fit(currentTrainX)
    training_sets[i][0]=currentPCA.transform(currentTrainX)
    test_sets[i][0]=currentPCA.transform(currentTestX)

**Linear Regression**

In [437]:
import sys
sys.path.append("FederatedLearning-main")
import Model


In [541]:

lr1=None
lr2=None
lr3=None
lrs=[lr1,lr2,lr3]

#Model creation for LR
for i,single_train in enumerate(training_sets):
    currModel=lrs[i]
    currX=single_train[0]
    currY=single_train[1]
    lrs[i]=Model.LinearRegression(currX.shape[1],1)
print(list(lrs[2].parameters()))

[Parameter containing:
tensor([[ 0.2865, -0.0846,  0.2761, -0.1072, -0.3342]], requires_grad=True), Parameter containing:
tensor([-0.3759], requires_grad=True)]


In [553]:
def opt_train(optimizer,scenario_index,loop_num):
    currOpt=optimizer
    currX=training_sets[scenario_index][0]
    currX=torch.tensor(currX,dtype=torch.float32)
    currY=training_sets[scenario_index][1].iloc[:,0]
    currY=torch.tensor(np.array(currY),dtype=torch.float32)
    currY=currY.reshape(currY.shape[0],1)
    for epoch in range(loop_num):
        currOpt.zero_grad()
        predictedResult=lrs[scenario_index](currX)
        loss=mseLoss(predictedResult,currY,lrs[scenario_index])
        loss.backward()
        currOpt.step()
        for name, param in lrs[scenario_index].named_parameters():
            if param.grad is not None:
                print(f"Parameter {name}, Gradient: {param.grad.mean().item()}")

In [521]:
#Model training for LR
import Loss
import torch.optim as optim
import torch
mseLoss=Loss.MSELoss()

optimizer1=optim.SGD(lrs[0].parameters(),lr=0.0008)
optimizer2=optim.SGD(lrs[1].parameters(),lr=0.0000000001)
optimizer3=optim.SGD(lrs[2].parameters(),lr=0.001)
optimizers=[optimizer1,optimizer2,optimizer3]

opt_train(optimizer1,0,1000)
mseLoss=Loss.MSELoss()

opt_train(optimizer2,1,1000)
mseLoss=Loss.RidgeLoss(0.5)
opt_train(optimizer3,2,1000)


In [554]:
mseLoss=Loss.MSELoss()
optimizer3=optim.SGD(lrs[2].parameters(),lr=0.0000000001)
opt_train(optimizer3,2,15)

curr=lrs[2]
print(list(curr.parameters()))
testX=torch.tensor(test_sets[2][0],dtype=torch.float32)
print(testX.shape)
print(torch.isnan(curr(testX)).sum().item())
print(mean_squared_error(test_sets[2][1].iloc[:,0],(curr(testX)).detach().numpy()))
print(r2_score(test_sets[2][1].iloc[:,0],(curr(testX)).detach().numpy()))
print("\n")

Parameter linear.weight, Gradient: -9.449885604879182e+34
Parameter linear.bias, Gradient: 0.0
Parameter linear.weight, Gradient: 5.552821046788674e+35
Parameter linear.bias, Gradient: 3.541774862152234e+21
Parameter linear.weight, Gradient: -3.2628775021863526e+36
Parameter linear.bias, Gradient: -9.44473296573929e+22
Parameter linear.weight, Gradient: 1.9172903486404314e+37
Parameter linear.bias, Gradient: 4.911261142184431e+23
Parameter linear.weight, Gradient: -inf
Parameter linear.bias, Gradient: 0.0
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient: nan
Parameter linear.bias, Gradient: nan
Parameter linear.weight, Gradient

ValueError: Input contains NaN.

In [519]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

for i in range(3):
    curr=lrs[i]
    testX=torch.tensor(test_sets[i][0],dtype=torch.float32)
    print(mean_squared_error(test_sets[i][1].iloc[:,0],(curr(testX)).detach().numpy()))
    print(r2_score(test_sets[i][1].iloc[:,0],(curr(testX)).detach().numpy()))
    print("\n")

1.7204885159335979
-0.9905201925220448


101.80103432924795
-81.68384714417327




ValueError: Input contains NaN.