In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
import warnings
warnings.simplefilter("ignore")

In [4]:
path = os.path.join(f"/home/dftml/Project/Sensor_Gas", "Gas_Sensor/")
os.chdir(path)
files = sorted(os.listdir())

In [None]:
for i in range(len(files)):
    df = pd.read_csv(f'/home/dftml/Project/Sensor_Gas/Gas_Sensor/{files[i]}')
    col = {i : i.split()[0] for i in df.columns}
    df.rename(columns=col, inplace=True)
    
    
    df_actual = df[(df["Flow"] >=235) & (df["Flow"] <=246)]
    df_actual = df_actual[df_actual["Temperature"] >= 21]
    
    df_actual["Heater"] = df_actual["Heater"].apply(lambda x: round(x,1))
    df_actual["Flow"] = df_actual["Flow"].apply(lambda x: int(x))
    df_actual["Humidity"] = df_actual["Humidity"].apply(lambda x: int(x))
    df_actual.drop("Time", axis =1, inplace =True)
    
    df_actual["CO"].where(~((df["CO"]>=0) & (df["CO"]<6)),"Low", inplace=True)
    df_actual["CO"].where(~((df["CO"]>=6) & (df["CO"]<13)),"Moderate", inplace=True)
    df_actual["CO"].where(~(df["CO"]>=13),"High", inplace=True)
    
    df_actual["Sensor_1"] = (df_actual["R1"] + df_actual["R2"] + df_actual["R3"] + df_actual["R4"] + df_actual["R5"] + df_actual["R6"] + df_actual["R7"])/7
    df_actual["Sensor_2"] = (df_actual["R8"] + df_actual["R9"] + df_actual["R10"] + df_actual["R11"] + df_actual["R12"] + df_actual["R13"] + df_actual["R14"])/7
    
    df_ind = df_actual[["Humidity","Temperature", "Flow", "Heater", "Sensor_1", "Sensor_2"]]
    df_dep = df_actual["CO"]
    
    exec(f"actual_{i+1} = df_actual")
    exec(f"df_ind{i+1} = df_ind")
    exec(f"df_dep{i+1} = df_dep")

In [6]:
from sklearn.linear_model import LogisticRegression

In [8]:
def optimization(X_train,y_train):
    l1 = list()
    
    # Lasso Regularizations (L1)
    for j in ["liblinear", "saga"]:
        d1 = dict()
        model_l1 = LogisticRegression(penalty="l1",solver=j,max_iter = 1e+4)
        model_l1.fit(X_train,y_train)
        d1["Penalty"] = "l1"
        d1["Solver"] = j
        d1["l1_ratio"] = "None"
        d1["train_score"] = model_l1.score(X_train,y_train)
        for k in range(1,14):
                exec(f"d1['testscore_day{k}'] = model_l1.score(df_ind{k},df_dep{k})")
        
        l1.append(d1)
        
    # Ridge Regularizations (L2)
    for j in ["lbfgs","liblinear","newton-cg", "sag", "saga"]:
        d2 = dict()
        model_l2 = LogisticRegression(penalty="l2",solver=j,max_iter = 1e+4)
        model_l2.fit(X_train,y_train)
        d2["Penalty"] = "l2"
        d2["Solver"] = j
        d2["l1_ratio"] = "None"
        d2["train_score"] = model_l2.score(X_train,y_train)
        for k in range(1,14):
                exec(f"d2['testscore_day{k}'] = model_l2.score(df_ind{k},df_dep{k})")
        
        l1.append(d2)
        
    # Elastic-Net Regularizations (L1-L2)
    for j in range(1,10):
        d3 = dict()
        model_l12 = LogisticRegression(penalty="elasticnet",solver="saga",max_iter = 1e+4, l1_ratio=j/10)
        model_l12.fit(X_train,y_train)
        d3["Penalty"] = "elasticnet"
        d3["Solver"] = "saga" 
        d3["l1_ratio"] = str(j/10)
        d3["train_score"] = model_l12.score(X_train,y_train)
        for k in range(1,14):
                exec(f"d3['testscore_day{k}'] = model_l12.score(df_ind{k},df_dep{k})")
                
        l1.append(d3)
        
        
    # No Regularizations
    for j in ["lbfgs","newton-cg", "sag", "saga"]:
        d4 = dict()
        model_l = LogisticRegression(penalty="none",solver=j,max_iter = 1e+4)
        model_l.fit(X_train,y_train)
        d4["Penalty"] = "none"
        d4["Solver"] = j
        d4["l1_ratio"] = "None"
        d4["train_score"] = model_l.score(X_train,y_train)
        for k in range(1,14):
                exec(f"d4['testscore_day{k}'] = model_l.score(df_ind{k},df_dep{k})")
        
        l1.append(d4)
        
    return l1

In [2]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

cloud_config= {
         'secure_connect_bundle': '/home/dftml/Project/Sensor_Gas/secure-connect-dftml.zip'
}
auth_provider = PlainTextAuthProvider('LfLGbSXqdpAStiWDIRYHRHIO', 'SM1z3dLJ7DuXKF.QNZscq3uo1rMtR0fskvZsg.su-ecdd5srOC.ddfJSj_kvIysqipaNShryYe2c1DMj7sy+fGqbY8A4s3Rm88ieB..qJ5TUZLIuEA9tlJqLHCZvfLDN')

cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect()

In [16]:
def load_optimization(table: str, X_train, y_train):
    
    optim = pd.DataFrame(optimization(X_train,y_train))
    
    if "index" in list(optim.columns):
        optim.drop("index", axis= 1, inplace = True)
    optim = optim.reset_index()
    
    columns_schema = """id int, penalty text, solver text, l1_ratio text, train_score float,
       testscore_day1 float, testscore_day2 float,testscore_day3 float,testscore_day4 float,
       testscore_day5 float, testscore_day6 float, testscore_day7 float, testscore_day8 float,
       testscore_day9 float, testscore_day10 float, testscore_day11 float,
       testscore_day12 float,testscore_day13 float, primary key (id)"""
    
    session.execute(f"create table if not exists sensor.logistic_optim_{table} ({columns_schema})")
    
    columns = """id, penalty, solver, l1_ratio, train_score,
       testscore_day1, testscore_day2, testscore_day3, testscore_day4,
       testscore_day5, testscore_day6, testscore_day7, testscore_day8,
       testscore_day9, testscore_day10, testscore_day11,
       testscore_day12, testscore_day13"""
    
    for i in range(len(optim)):
        row = tuple(optim.iloc[i].values)
        session.execute(f"""insert into sensor.logistic_optim_{table} ({columns}) values {row}""")
        

In [18]:
for i in range(1,14):
    table = "day" + str(i)
    exec(f"X_train = df_ind{i}")
    exec(f"y_train = df_dep{i}")
    
    load_optimization(table, X_train, y_train)

In [11]:
def best_param(dayno:str):
    
    df_optim = pd.DataFrame(list(session.execute(f"select * from sensor.logistic_optim_{dayno}")))
    df_optim["test_mean"] = np.mean(df_optim.iloc[:,4:17], axis = 1)
    df_optim["deviation"] = np.std(df_optim.iloc[:,4:17], axis=1)
    
    # best highest test mean and less deviated analysis of predicted parameter from all test datasets
    best_param = df_optim[df_optim["test_mean"] == df_optim["test_mean"].max()]
    
    df_result = best_param[["penalty", "solver", "l1_ratio", "deviation",  "test_mean", "train_score"]]
    df_result.insert(0,"Day",dayno)
    df_result.insert(0,"id",int(dayno[3:]))
    
    
    columns_schema = """id int, day text, penalty text, solver text, l1_ratio text, deviation float, 
                     test_mean float, train_score float, primary key (id) """
    
    session.execute(f"create table if not exists sensor.logistic_best_param ({columns_schema})")
    
    columns = """id, day, penalty, solver, l1_ratio, deviation,
                test_mean, train_score"""
    
    row = tuple(df_result.iloc[0].values)
    session.execute(f"insert into sensor.logistic_best_param ({columns}) values {row}")

In [12]:
for i in range(1,14):
    dayno = "day" + str(i)
    best_param(dayno)

In [17]:
for i in range(1,14):
    
    table = "day" + str(i)
    df = pd.DataFrame(list(session.execute(f"select * from sensor.logistic_optim_{table}")))
    df.sort_values(by="id", inplace=True)
    df.set_index("id", inplace=True)
    df.to_csv(f"/home/dftml/Project/Sensor_Gas/Logistic/optim_{table}.csv")