#### <ins>Imports/Installs</ins>



##### <ins>Installing required packages (if missing)</ins>

In [None]:
# !pip install pandas
# !pip install matplotlib
# !pip install scikit-learn
# !pip install xgboost

##### <ins>Import required libs</ins>

In [None]:
import pandas as pd

import matplotlib.pyplot as pyplot

from sklearn import metrics
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

##### <ins>Importing Dataset.csv</ins>

In [None]:
gyro = pd.read_csv('../datasets/gyro/gyro_mobile.csv')

##### <ins>Inspecting The Dataset</ins>

In [None]:
def printSummaryStatistics():   # Prints statistical for each column in the dataframe
    gyroCols = gyro.columns.to_list()
    for col in gyroCols:
        print(f"Column: {col} \n{gyro[col].describe()} \nData Type: {gyro[col].dtype}\n")

print(f'{gyro.head()}\n')       # Looking into basic structure
printSummaryStatistics()


Insights:
- 31991 data points
- Every feature is continuous
- Activity is either 1 or 0 (binary classification)
- Dataset contains a timestamp that might be dropped

#### <ins>Data Preprocessing and Training</ins>

##### <ins>Dropping timestamp and splitting data into training and testing</ins>

In [None]:
gyro = gyro.drop(columns='timestamp')

xtrain, xtest, ytrain, ytest = train_test_split(
    gyro.iloc[:,:6],
    gyro.iloc[:,6:],
    test_size=0.2,
    random_state=0
)

##### <ins>Training and Improving</ins><br>
Um eine gute Anzahl an Estimators zu bestimmen, wird zuerst ein Modell mithilfe von Early Stopping, sowie einer großen Menge an Estimatoren trainiert. Hiermit wird die beste Anzahl an Iterationen ermittelt und mit dieser Anzahl ein weiteres Modell trainiert.

In [None]:
preModel = XGBClassifier(           # "Spendermodell"
    objective='binary:logistic',
    n_estimators=10000,             # "Große Anzahl an Schaetzern, die nicht erreicht werden soll"
    early_stopping_rounds=20,       # Anzahl an Runden, bei denen sich das Modell nicht verbessern muss, bis abgebrochen wird
    max_depth=2,
    learning_rate=0.1
)

evaldata=[(xtrain,ytrain),(xtest,ytest)]          # Datensatz zur Evaluierung

preModel.fit(xtrain, ytrain, eval_set=evaldata, verbose=False)

bIter = preModel.best_iteration     # Beste Anzahl an Estimatoren

model = XGBClassifier(
    objective='binary:logistic',
    # tree_method = 'exact',
    n_estimators=bIter,
    max_depth=2,
    learning_rate=0.1,
    base_score=0.5
)

model.fit(xtrain, ytrain, eval_set=evaldata, verbose=False)

yhat = model.predict(xtest)

#### <ins>Func Definitions</ins>

##### <ins>Performance Metrics and Evaluation</ins>

In [None]:
def printConfusionMatrix(): # Confusion Matrix
    metrics.ConfusionMatrixDisplay.from_estimator(model, xtest, ytest, cmap='Blues')
    pyplot.show()

def plotLossCurves():       # Loss Curves
    # save evaluation results
    results = model.evals_result()
    # plot curves
    pyplot.plot(results['validation_0']['logloss'], label='train')
    pyplot.plot(results['validation_1']['logloss'], label='train')
    # show the legend
    pyplot.xlabel('Iterations')
    pyplot.ylabel('Log Loss')
    pyplot.legend()
    # show the plot
    pyplot.show()

def printClassReport(): # Classification Report
    # Report
    print(metrics.classification_report(ytest, yhat, digits = 3))

def printMisc():
    # Misc
    print(f'# Trees: \t{bIter}')
    print(f'Test Accuracy: \t{accuracy_score(ytest, yhat)}')
    print(f'Base_Score{model.base_score}')
    print(f'Best Iteration: {bIter}')
    print(f'\nPredict_Proba Return: \n{model.predict_proba(xtest)}')

##### <ins>Porting this Bitch</ins>

In [None]:
def portToC(model):
    import m2cgen as m2c

    with open('../exported_models/currentExport.c','w') as f:
        code = m2c.export_to_c(model)
        f.write(code)

portToC(model)
        

##### <ins>Generating Code for Lazy People</ins>

In [None]:
def genFloat(start=0, size=500, time=100):
    start = start
    size = size
    time = time
    length = 2
    

    print(f'void infer() {{')
    print(f'\t// Printing Range:')
    print(f'\tSerial.println(\"Start: {start} | End: {start+size}\");\n')
    print(f'\tSerial.println("aScore0,aScore1");')

    print(f'\t// Declarations:')
    print(f'\tint length = {length};')
    print(f'\tfloat result[length];')
    print(f'\tint time = {time};\n')

    print(f'\t// Model Inference')
    for x in range(start,(start+size)):  
        print(f'\tfloat x_{x}[] = {{' , end="")    
        features = xtest.values[x]
        for i in range(len(features)):
            if i < (len(features)-1):
                print(features[i], end=", ")
            else:
                print(features[i], end="};\n")
        print(f'\tint y_{x} = {yhat[x]};')
        print(f'\tscore(x_{x}, result);')
        # print(f'\tprintScoreCompare(result, length, y_{x});')
        print(f'\tprintScoreCompareCSV(result, length, y_{x});')
        print(f'\tdelay(time);\n')
    print(f'}}')

def genDouble(start=0, size=500, time=100):
    start = start
    size = size
    time = time
    length = 2

    print(f'void infer() {{')
    print(f'\t// Printing Range:')
    print(f'\tSerial.println(\"Start: {start} | End: {start+size}\");\n')
    print(f'\tSerial.println("aScore0,aScore1");')
    print(f'\t// Declarations:')
    print(f'\tint length = {length};')
    print(f'\tdouble result[length];')
    print(f'\tint time = {time};\n')

    print(f'\t// Model Inference')
    for x in range(start,(start+size)):
        print(f'\tdouble x_{x}[] = {{' , end="")        
        features = xtest.values[x]
        for i in range(len(features)):
            if i < (len(features)-1):
                print(features[i], end=", ")
            else:
                print(features[i], end="};\n")
        print(f'\tint y_{x} = {yhat[x]};')
        print(f'\tscore(x_{x}, result);')
        # print(f'\tprintScoreCompare(result, length, y_{x});')
        print(f'\tprintScoreCompareCSV(result, length, y_{x});')
        print(f'\tdelay(time);\n')
    print(f'}}')

##### <ins>Generating Inference Data</ins>

In [None]:
def generateProbDF(localCapture=model,features_test=xtest):
    xtestlist = localCapture.predict_proba(features_test).tolist()
    list1 = []
    list2 = []

    for x in xtestlist:
        list1.append(round(x[0],4))
        list2.append(round(x[1],4))

    probDF = pd.DataFrame({
        'Label': localCapture.predict(features_test),
        'Prob0': list1,
        'Prob1': list2
    })
    return(probDF)

def exportProbDF(probDF = generateProbDF()):
    probDF.to_csv('../datasets/gyro/baseCapture.csv')

def importInoCapture():
    serial = pd.read_csv('../datasets/gyro/inoCapture.csv')
    serial = serial.truncate(after=(len(serial)-2)) # get rid of ##### REPEATING... #####
    return(serial)

def generateComparison(probDF=generateProbDF(),inoCapture=importInoCapture()):
    probDF = probDF.truncate(after=(len(inoCapture)-1))
    probDF = probDF.join(inoCapture)
    probDF.to_csv('../datasets/gyro/compared-gyro-float.csv')

#### <ins>Main</ins>

##### <ins>Evaluation Metrics</ins>

In [None]:
# printConfusionMatrix()
# plotLossCurves()
# printClassReport()
# printMisc()

##### <ins>Code Generation</ins>

In [None]:
# genFloat()    # param: size
# genDouble()   # param: size

##### <ins>Generate C-Port</ins>

In [None]:
# portToC(model)

##### <ins>Generate Inference Data and Comparison</ins>

In [None]:
# generateProbDF()
# importInoCapture()
# exportProbDF()
# generateComparison()