In [346]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import sys
sys.path.append("FederatedLearning-main")
import Model

scenario_one=pd.read_csv("database/Scenario_1/Scenario_1_C.csv")
scenario_two=pd.read_csv("database/Scenario_2/Scenario_2_C.csv")
scenario_three=pd.read_csv("database/Scenario_3/Scenario_3_C.csv")
scenarios=[scenario_one,scenario_two,scenario_three]

In [347]:
def columnwise_na_elimiation(data):
    """
    The method filter out all columns contains NA value in the set

    Input: pandas dataframe
    Output: non-NA containing pandas dataframe 
    """
    col_na=data.isna().sum(axis=0)
    # print(col_na)
    col_na[col_na!=0]=1
    col_na=col_na.astype(bool)
    # print(col_na)
    col_na=~col_na
    # print(col_na)
    filtered_data=data.loc[:,col_na]
    return filtered_data

In [348]:
nanFreeSet=[]
for scenario in scenarios:
    nanfreedata=columnwise_na_elimiation(scenario)
    nanFreeSet.append(nanfreedata)

train_set=[]
testing_set=[]

for nanfreeData in nanFreeSet:
    train, test = train_test_split(nanfreeData, test_size=0.2)
    trainX = train.iloc[:, :-2]
    trainY = train.iloc[:, -2:]
    testX = test.iloc[:, :-2]
    testY = test.iloc[:, -2:]

    scaler = StandardScaler()
    trainX_scaled = scaler.fit_transform(trainX)
    testX_scaled = scaler.transform(testX)

    pca = PCA(n_components=5)
    trainX_pca = pca.fit_transform(trainX_scaled)
    testX_pca = pca.transform(testX_scaled)

    train_set.append((trainX_pca,trainY))
    testing_set.append((testX_pca,testY))


In [349]:
#Training
def training(model,input_lr,trainX,trainY):
    optimizer = optim.SGD(model.parameters(), lr=input_lr)
    mse_loss = Loss.MSELoss()

    # Convert data to tensors
    trainX_tensor = torch.tensor(trainX, dtype=torch.float32)
    trainY_tensor = torch.tensor(np.array(trainY), dtype=torch.float32).reshape(-1, 1)

    # Training loop
    for epoch in range(1000):
        optimizer.zero_grad()
        predicted_result = model(trainX_tensor)
        loss = mse_loss(predicted_result, trainY_tensor,model)
        loss.backward()
        optimizer.step()
    #     if (epoch>990):
    #         print(f"Epoch [{epoch + 1}/10], Loss: {loss.item()}")
    # print(list(model.parameters()))

def scoring(model,testX,testY):
    testX=torch.tensor(testX,dtype=torch.float32)
    return ((mean_squared_error(testY,(model(testX)).detach().numpy())),r2_score(testY,(model(testX)).detach().numpy()))

In [353]:
#2 neurons is the best overall, cause more than that results in overfitting.
for i in range(3):
    for j in range(2,6):
        trainX=train_set[i][0]
        trainY=train_set[i][1].iloc[:,0]
        testX=testing_set[i][0]
        testY=testing_set[i][1].iloc[:,0]
        model=Model.MLPRegression(trainX.shape[1],j,1,2)
        training(model,0.001,trainX,trainY)
        score=scoring(model,testX,testY)
        print(j," number of neurons Fermi_energy for scenario ",str(i+1)," has MSE of: ",score[0]," r2  of ",score[1])

        trainY=train_set[i][1].iloc[:,1]
        testY=testing_set[i][1].iloc[:,1]
        model=Model.MLPRegression(trainX.shape[1],j,1,2)
        training(model,0.001,trainX,trainY)
        score=scoring(model,testX,testY)
        print(j," number of neurons thermo_prob_norm for scenario ",str(i+1)," has MSE of: ",score[0]," r2  of ",score[1])

2  number of neurons Fermi_energy for scenario  1  has MSE of:  1.3011264579601078  r2  of  -0.5097773857662864
2  number of neurons thermo_prob_norm for scenario  1  has MSE of:  0.01632955166283644  r2  of  -0.43002625499645397
3  number of neurons Fermi_energy for scenario  1  has MSE of:  1.3287307939453128  r2  of  -0.5418084016330278
3  number of neurons thermo_prob_norm for scenario  1  has MSE of:  0.01792178914730563  r2  of  -0.569463114868267
4  number of neurons Fermi_energy for scenario  1  has MSE of:  1.1450425986113697  r2  of  -0.3286636441417383
4  number of neurons thermo_prob_norm for scenario  1  has MSE of:  0.02456729329243578  r2  of  -1.15142920931113
5  number of neurons Fermi_energy for scenario  1  has MSE of:  1.368018678260886  r2  of  -0.587396560194686
5  number of neurons thermo_prob_norm for scenario  1  has MSE of:  0.020744990849698275  r2  of  -0.8166990856365479
2  number of neurons Fermi_energy for scenario  2  has MSE of:  1.6097727712377081  r2 