In [323]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import sys
sys.path.append("FederatedLearning-main")
import Model

scenario_one=pd.read_csv("database/Scenario_1/Scenario_1_C.csv")
scenario_two=pd.read_csv("database/Scenario_2/Scenario_2_C.csv")
scenario_three=pd.read_csv("database/Scenario_3/Scenario_3_C.csv")
scenarios=[scenario_one,scenario_two,scenario_three]

In [324]:
#NaN elimination and standard scaling
def columnwise_na_elimiation(data):
    """
    The method filter out all columns contains NA value in the set

    Input: pandas dataframe
    Output: non-NA containing pandas dataframe 
    """
    col_na=data.isna().sum(axis=0)
    # print(col_na)
    col_na[col_na!=0]=1
    col_na=col_na.astype(bool)
    # print(col_na)
    col_na=~col_na
    # print(col_na)
    filtered_data=data.loc[:,col_na]
    return filtered_data

In [326]:
#naelimination and splitting
nanFreeSet=[]
for scenario in scenarios:
    nanfreedata=columnwise_na_elimiation(scenario)
    nanFreeSet.append(nanfreedata)

train_set=[]
testing_set=[]

for nanfreeData in nanFreeSet:
    train, test = train_test_split(nanfreeData, test_size=0.2)
    trainX = train.iloc[:, :-2]
    trainY = train.iloc[:, -2:]
    testX = test.iloc[:, :-2]
    testY = test.iloc[:, -2:]

    scaler = StandardScaler()
    trainX_scaled = scaler.fit_transform(trainX)
    testX_scaled = scaler.transform(testX)

    pca = PCA(n_components=5)
    trainX_pca = pca.fit_transform(trainX_scaled)
    testX_pca = pca.transform(testX_scaled)

    train_set.append((trainX_pca,trainY))
    testing_set.append((testX_pca,testY))


In [342]:
#Training
def training(model,input_lr,trainX,trainY):
    optimizer = optim.SGD(model.parameters(), lr=input_lr)
    mse_loss = Loss.MSELoss()

    # Convert data to tensors
    trainX_tensor = torch.tensor(trainX, dtype=torch.float32)
    trainY_tensor = torch.tensor(np.array(trainY), dtype=torch.float32).reshape(-1, 1)

    # Training loop
    for epoch in range(1000):
        optimizer.zero_grad()
        predicted_result = model(trainX_tensor)
        loss = mse_loss(predicted_result, trainY_tensor,model)
        loss.backward()
        optimizer.step()
    #     if (epoch>990):
    #         print(f"Epoch [{epoch + 1}/10], Loss: {loss.item()}")
    # print(list(model.parameters()))

def scoring(model,testX,testY):
    testX=torch.tensor(testX,dtype=torch.float32)
    return ((mean_squared_error(testY,(model(testX)).detach().numpy())),r2_score(testY,(model(testX)).detach().numpy()))

In [344]:
for i in range(3):
    
    trainX=train_set[i][0]
    trainY=train_set[i][1].iloc[:,0]
    testX=testing_set[i][0]
    testY=testing_set[i][1].iloc[:,0]
    model=Model.LinearRegression(trainX.shape[1],1)
    training(model,0.001,trainX,trainY)
    score=scoring(model,testX,testY)
    print("Fermi_energy for scenario ",str(i+1)," has MSE of: ",score[0]," r2  of ",score[1])

    trainY=train_set[i][1].iloc[:,1]
    testY=testing_set[i][1].iloc[:,1]
    model=Model.LinearRegression(trainX.shape[1],1)
    training(model,0.001,trainX,trainY)
    score=scoring(model,testX,testY)
    print("thermo_prob_norm for scenario ",str(i+1)," has MSE of: ",score[0]," r2  of ",score[1])
    

Fermi_energy for scenario  1  has MSE of:  1.251528492758159  r2  of  -0.45685475073674464
thermo_prob_norm for scenario  1  has MSE of:  0.010605425000298385  r2  of  0.0918002878642219
Fermi_energy for scenario  2  has MSE of:  1.5215408891895215  r2  of  -0.23529014324160036
thermo_prob_norm for scenario  2  has MSE of:  0.040712470774379235  r2  of  0.06990209796458113
Fermi_energy for scenario  3  has MSE of:  1.2654416032419364  r2  of  -0.4780324397467359
thermo_prob_norm for scenario  3  has MSE of:  0.0028718277777898668  r2  of  0.7499324555885263
