In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
import operator
import pandas as pd
import numpy as np
import os
import pprint
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE
import pandasql as sqldf

Ziel dieses Notebooks ist es die Hyperparameter Lernrate und Epochenanzahl für das gegebene Neuronale Netz zu optimieren. Hierfür wird eine Klasse verwendet, die in gegebenen Intervallen verschiedene Kombinationen dieser Parameter geordnet ausprobiert (kein random search) und das beste Resultat, basierend auf übergebenen Trainingsdaten zurück gibt. 

In [2]:
def load_data(directory = 'sliced_data'):
    '''
        Funktion, die die aufbereiteten/vorbereiteten Daten aus existierenden CSV Dateien 
        einliest und je nach Vollständigkeit in zwei Dictionaries abspeichert,
        geschlüsselt nach der jeweiligen RaceId
    '''
    #Ids von Rennen, die als Regenrennen identifiziert wurden
    rain_id = [847,861,879,910,914,934,942,953,957,967,950,982]
    if os.path.exists(directory):
        csv_filenames = []
        #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
        for filename in os.listdir(os.getcwd()+'/'+directory):
            typ = filename.split('.')[-1]
            name = filename.split('.')[0]
            if typ == 'csv':
                csv_filenames.append(filename)
        sliced_races = {}
        #einlesen und abspeichern als dataframe aller dateien
        for file in csv_filenames:
            try:
                df = pd.read_csv(directory+'/'+file, engine = 'python', sep = ';', decimal = '.')
                del df['Unnamed: 0']
            except Exception as e:
                df = pd.read_csv(directory+'/'+file, engine = 'c', sep = ';', decimal = '.')
                del df['Unnamed: 0']
                print(e)
            #print(df.head())
            f = int(file.split('_')[-1].split('.')[0])
            df["rain"] = 0
            #setzen der regenkomponente auf 1 für regenrennen
            if list(df["raceId"])[0] in rain_id:
                df["rain"] = 1
            sliced_races[f] = df
        print('Einlesen der sliced Dateien erfolgreich')
    else:
        raise ('sliced Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')
        
    if os.path.exists('split_data'):
        csv_filenames = []
        #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
        for filename in os.listdir(os.getcwd()+'/split_data'):
            typ = filename.split('.')[-1]
            name = filename.split('.')[0]
            if typ == 'csv':
                csv_filenames.append(filename)
        split_by_race = {}
        #einlesen und abspeichern als dataframe aller dateien
        for file in csv_filenames:
            try:
                df = pd.read_csv('split_data/'+file, engine = 'python', sep = ';', decimal = '.')
                del df['Unnamed: 0']
            except Exception as e:
                df = pd.read_csv('split_data/'+file, engine = 'c', sep = ';', decimal = '.')
                del df['Unnamed: 0']
                print(e)
            f = int(file.split('_')[-1].split('.')[0])
            split_by_race[f] = df
        print('Einlesen der split Dateien erfolgreich')
    else:
        raise('split Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')
        
    return sliced_races, split_by_race

def train_test (data_dict, test_num = 5, nogo_columns = []):
    train_data = []
    test_data = []
    test_final = {}
    temp_y_podium = []
    test_races = list(data_dict.keys())
    random.shuffle(test_races)
    test_races = test_races[0:test_num]
    for key, value in data_dict.items():
        helper = key
        for did in value.driverId.unique():
            temp = value.where(value.driverId == did).dropna(how = "all")
            if list(temp["podium_position"])[0] < 0: #Top x finish positions
                pp = 1
            else:
                if key in test_races:
                    temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                    temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                    #temp_y = temp_y[0]
                    cols = [col for col in temp.columns if col not in nogo_columns]
                    temp_x = temp[cols]
                    stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                    temp_x = temp_x.tail(1)
                    temp_x['stop_binary'] = stops
                    x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                    #temp_x = x_tensor.float()
                    test_data.append((x_tensor, [temp_y[0]]))
                else:
                    temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                    temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                    #temp_y = temp_y[0]
                    cols = [col for col in temp.columns if col not in nogo_columns]
                    temp_x = temp[cols]
                    stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                    temp_x = temp_x.tail(1)
                    temp_x['stop_binary'] = stops
                    x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                    #temp_x = x_tensor.float()
                    train_data.append((x_tensor, [temp_y[0]]))
        if key in test_races:
            test_final[key]=test_data
        test_data = []
    random.shuffle(train_data)
    #random.shuffle(test_data)
    #test_data = train_data[len(train_data)-100:]
    train_data = train_data#[0:len(train_data)-100]
            
            #break
            #for i, row in temp.iterrows():
            
    return train_data, test_final

Einlesen der Dateien als Dataframes, welche in zwei Dictionaries gespeichert werden:<br>
- Attribut directory verweißt auf den Ordnernamen der sliced Dateien, Default ist 'sliced_data'
- für die split_by_race Daten sollte kein separates Directory angegeben werden müssen, da es keinen Unterschied zwischen den Rennen gibt! (Nur bei Slicing: Wie viele Prozent des Rennens möchte ich? ergibt ein anderes Directory Sinn)

In [3]:
sliced_races, split_by_race = load_data(directory = 'sliced_data50')

Einlesen der sliced Dateien erfolgreich
Einlesen der split Dateien erfolgreich


Aufteilen des Datensatzes in einen Trainingsteil und einen Testteil, test enthält ein Dictionary mit Tensoren, die einer RaceId zugeordnet sind, train ist eine Liste von Tensoren. Test_num gibt die Anzahl Rennen an, die als Testdaten zufällig gewählt werden sollen, nogo_columns bezeichnet die Spalten, die nicht beachtet werden sollen, bisher aber im DS enthalten sind

In [15]:
nogo_columns = ['year', 'podium_position', 'raceId','lap_number','total_laps','driverId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
train, test = train_test(sliced_races, test_num = 5, nogo_columns = nogo_columns)

Definieren von notwendigen Klassen: 
    
Zuerst wird die Klasse für das Neuronale Netz definiert, danach die Klasse für die Optimierung der Hyperparameter Lernrate und Epochen Anzahl

In [5]:
class Netz(nn.Module):
    def __init__(self):
        super(Netz,self).__init__()
        self.fc1 = nn.Linear(110, 150)
        self.fc2 = nn.Linear(150, 180)
        self.fc3 = nn.Linear(180, 190)
        self.fc4 = nn.Linear(190, 120)
        self.fc5 = nn.Linear(120, 100)
        self.fc6 = nn.Linear(100, 70)
        self.fc7 = nn.Linear(70, 30)
        self.fc8 = nn.Linear(30, 1)
        self.dropout = nn.Dropout()
        
        
    def forward(self,x):
        x = self.fc1(x.float())
        x = F.relu(x.float())
        x = self.fc2(x.float())
        x = F.relu(x.float())
        x = self.dropout(x)
        x = self.fc3(x.float())
        x = F.relu(x.float())
        x = self.fc4(x.float())
        x = F.relu(x.float())
        #x = self.dropout(x)
        x = self.fc5(x.float())
        x = F.relu(x.float())
        x = self.fc6(x.float())
        x = F.relu(x.float())
        x = self.fc7(x.float())
        x = F.relu(x.float())
        x = self.fc8(x.float())
        return x
        

In [6]:
class HP_Optimizer(object):
    
    def __init__(self, lr_range = (0.0001,0.0001), step_size = 0.0001, max_epochs = (2,2), opt = 'Adam', cuda = True):
        
        self.__model = Netz()
        self.__lr = lr_range
        self.__epochs = max_epochs
        self.__optimizer = opt
        self.__steps = step_size
        self.__combination_results = {}
        self.__combination_overview = {}
        self.train_data = None
        self.test_data = None
        self.cuda = cuda
        self.opt_combination = {}
        
        
    def validate_combinations(self):
        
        specifics = {}
        if self.__optimizer == 'Adam':
            
            #definieren der range für die lernratenoptimierung
            lr_s = self.__lr[0]
            lr_e = self.__lr[1]
            
            #wurde eine range für die anzahl der epochen übergeben?
            if self.__epochs[0] == self.__epochs[1]:
                #hyperparamter epochenanzahl wird nicht optimiert
                max_epoch = self.__epochs[0]
                
                if lr_s == lr_e:
                    #es ist keine range für die lernrate gegeben, in der diese optimiert werden soll
                    print('Parameter Epochen und Lernrate können nicht optimiert werden, da kein Intervall übergeben wurde')
    
                    #setzen des optimizers als Adam
                    self.__model = Netz()
                    optimizer = optim.Adam(self.__model.parameters(), lr = lr_s)
                
                    for epoch in range(1,max_epoch):
                        self.__train(epoch, optimizer)  
                        
                    result = self.__test()
                    A = result.prediction.tolist()
                    y = result.target.tolist()
                    mae = MAE(A,y)
                    specifics = {}
                    specifics['lr'] = lr_s
                    specifics['epochen'] = max_epoch
                    
                    key = random.randint(0,10000)
                    while key in list(self.__combination_results.keys()):
                         key = random.randint(0,10000)
                    
                    self.__combination_results[key] = mae
                    self.__combination_overview[key] = specifics
                    
                            
                else:
                
                    for l in np.arange(lr_s,lr_e, self.__steps):
                
                        #setzen des optimizers als Adam
                        self.__model = Netz()
                        optimizer = optim.Adam(self.__model.parameters(), lr = l)
                        #trainieren des modells
                        for epoch in range(1,max_epoch):
                            self.__train(epoch, optimizer)  
                        
                        result = self.__test()
                        A = result.prediction.tolist()
                        y = result.target.tolist()
                        mae = MAE(A,y)
                        specifics = {}
                        specifics['lr'] = l
                        specifics['epochen'] = max_epoch
                        
                        #abspeichern der gewonnenen informationen (MAE nach lr und anzahl durchgeführter epochen)
                        key = random.randint(0,10000)
                        while key in list(self.__combination_results.keys()):
                             key = random.randint(0,10000)
                        
                        self.__combination_results[key] = mae
                        self.__combination_overview[key] = specifics
                    
                    
            else:
                for max_epoch in range(self.__epochs[0], self.__epochs[1]):
                    #definieren der range für die lernratenoptimierung
                    lr_s = self.__lr[0]
                    lr_e = self.__lr[1]
            
                    #hyperparamter epochenanzahl wird nicht optimiert
                        
                    if lr_s == lr_e:
                        #es ist keine range für die lernrate gegeben, in der diese optimiert werden soll
                        #print('Parameter Epochen und Lernrate können nicht optimiert werden, da kein Intervall übergeben wurde')
            
                        #setzen des optimizers als Adam
                        self.__model = Netz()
                        optimizer = optim.Adam(self.__model.parameters(), lr = lr_s)
                        
                        for epoch in range(1,max_epoch):
                            self.__train(epoch, optimizer)  
                                
                        result = self.__test()
                        A = result.prediction.tolist()
                        y = result.target.tolist()
                        mae = MAE(A,y)
                        specifics = {}
                        specifics['lr'] = lr_s
                        specifics['epochen'] = max_epoch
                          
                        key = random.randint(0,10000)
                        while key in list(self.__combination_results.keys()):
                             key = random.randint(0,10000)
                            
                        self.__combination_results[key] = mae
                        self.__combination_overview[key] = specifics
                            
                                    
                    else:
                        
                        for l in np.arange(lr_s,lr_e, self.__steps):
                        
                            #setzen des optimizers als Adam
                            self.__model = Netz()
                            optimizer = optim.Adam(self.__model.parameters(), lr = l)
                            #trainieren des modells
                            for epoch in range(1,max_epoch):
                                self.__train(epoch, optimizer)  
                            
                            result = self.__test()
                            A = result.prediction.tolist()
                            y = result.target.tolist()
                            mae = MAE(A,y)
                            specifics = {}
                            specifics['lr'] = l
                            specifics['epochen'] = max_epoch
                            
                            #abspeichern der gewonnenen informationen (MAE nach lr und anzahl durchgeführter epochen)
                            key = random.randint(0,10000)
                            while key in list(self.__combination_results.keys()):
                                 key = random.randint(0,10000)
                            
                            self.__combination_results[key] = mae
                            self.__combination_overview[key] = specifics
            
            #finden der besten kombination nach minimalstem Error (MAE)
            key_min = min(self.__combination_results.keys(), key=(lambda k: self.__combination_results[k]))
            best_combination = self.__combination_overview[key_min]
            best_combination['mae'] = self.__combination_results[key_min]
            self.opt_combination = best_combination
        else:
            raise ('No valid optimizer given! Try Adam for example!')
            
    def __train(self, epoch, optimizer):
        if self.cuda:
            self.__model.cuda()
            self.__model.train()
            batch_id = 0
            for data, target in self.train_data:
                data = data.cuda()
                target = torch.Tensor(target).unsqueeze(0).cuda()
                shape = target.size()[1]
                target = target.resize(shape,1).cuda()
                optimizer.zero_grad()
                out = self.__model(data)
                #print("Out: ", out, out.size())
                #print("Target: ", target, target.size())
                criterion = nn.MSELoss()
                loss = criterion(out, target)
                loss.backward()
                optimizer.step()
                print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch, batch_id *len(data), len(train_data),
                100. * batch_id / len(train_data), loss.item()))
                batch_id +=1
        else:
            self.__model.train()
            batch_id = 0
            for data, target in self.train_data:
                #data = data.cuda()
                target = torch.Tensor(target).unsqueeze(0)#.cuda()
                shape = target.size()[1]
                target = target.resize(shape,1)#.cuda()
                optimizer.zero_grad()
                out = self.__model(data)
                #print("Out: ", out, out.size())
                #print("Target: ", target, target.size())
                criterion = nn.MSELoss()
                loss = criterion(out, target)
                loss.backward()
                optimizer.step()
                #print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                #    epoch, batch_id *len(data), len(train_data),
                #100. * batch_id / len(train_data), loss.item()))
                batch_id +=1
            
    def __test(self):
        total = 0
        count = 0
        result_dict = {}
        result = pd.DataFrame(columns = ['target','prediction'])
        help_dict = {}
        if self.cuda:
            self.__model.cuda()
            for key in self.test_data.keys():
                #print(key)
                help_dict = {}
                for data, target in self.test_data[key]:
                    self.__model.eval()
                    #files.listdir(path)
                    data = data.cuda()
                    target = torch.Tensor(target).unsqueeze(0)
                    shape = target.size()[1]
                    target = target.resize(shape,1).cuda()
                    out = self.__model(data).cpu()
                    #print(out)
                    out = out.detach().numpy()
                    #out = np.round(out)
                    target = target.cpu()
                    target = target.detach().numpy()
                    #print(data)
                    #print(data["driverId"])
                    total += abs(out - target[0][0])
                    #print("current_position: ", data[0][0].item())
                    #print("Output: ", out)
                    #print("Target: ", target)
                    help_dict[target[0][0]] = out
                    #print("Difference: ", out - target)
                    count+=1
                #Auslesen der predicteten Werte A und der zugehörigen targets y
                A = [x[0][0] for x in list((help_dict.values()))]
                y = list(help_dict.keys())
                
                #Anfügen der Werte an Result
                t = pd.DataFrame()
                t['target'] = y
                t['prediction_value'] = A
                t = sqldf.sqldf('''select * from t order by prediction_value ASC''')
                t.reset_index(inplace = True)
                t.rename(columns = {'index':'prediction'}, inplace = True)
                t['prediction'] = t['prediction']+1
                
                
                result = result.append(t)
                
        else:
            for raceId in self.test_data.keys():
                #print(key)
                help_dict = {}
                for data, target in self.test_data[raceId]:
                    self.__model.eval()
                    #files.listdir(path)
                    #data = data.cuda()
                    target = torch.Tensor(target).unsqueeze(0)
                    shape = target.size()[1]
                    target = target.resize(shape,1)#.cuda()
                    out = self.__model(data)#.cpu()
                    #print(out)
                    out = out.detach().numpy()
                    #out = np.round(out)
                    #target = target.cpu()
                    target = target.detach().numpy()
                    #print(data)
                    #print(data["driverId"])
                    total += abs(out - target[0][0])
                    #print("current_position: ", data[0][0].item())
                    #print("Output: ", out)
                    #print("Target: ", target)
                    help_dict[target[0][0]] = out
                    #print("Difference: ", out - target)
                    count+=1
                #Auslesen der predicteten Werte A und der zugehörigen targets y
                A = [x[0][0] for x in list((help_dict.values()))]
                y = list(help_dict.keys())
                
                #Anfügen der Werte an Result
                t = pd.DataFrame()
                t['target'] = y
                t['prediction_value'] = A
                t = sqldf.sqldf('''select * from t order by prediction_value ASC''')
                t.reset_index(inplace = True)
                t.rename(columns = {'index':'prediction'}, inplace = True)
                t['prediction'] = t['prediction']+1
                
                result = result.append(t)
        return result
         
            
    def get_all_information(self):
        
        print('Chosen Model:',self.__model)
        print('Learningrate Range:',self.__lr)
        print('Maximum Epochs:', self.__epochs)
        print('Chosen Optimizer:', self.__optimizer)
        print('Result Encoding:', self.__combination_overview)
        print('Results:', self.__combination_results)
        print('Optimale Kombination:', self.opt_combination)
        
    def help(self):
        print('Parameters with defaults:\nlr_range --> (0.0001,0.0001),\nstep_size--> 0.0001,\nmax_epochs-->(2,2),\nopt-->"Adam",\ncuda=True')
        print('lr_range: Tupel with learnrate range')
        print('step_size: float/int for step_size of learnrate')
        print('max_epochs: Tupel with number of epochs range')
        print('opt: Optimizer (by default Adam)')
        print('cuda: True/False if cuda should be used, default = True\n')
        print('Attributes:')
        print('set self.train_data as dictionary with races (form: {raceId: race(dataframe)})')
        print('set self.test_data as dictionary with races (form: {raceId: race(dataframe)})')
        print('self.opt_combination: Dictionary which contains the best combination of the given parameters\n')
        print('Methods:')
        print('call self.validate_combination() to compare all combinations')
        print('get all information/results with self.get_all_information()')

Aufrufen des Optimierers, übergeben der zuvor erzeugten Test- und Trainingsdatensätze, angeben der Intervalle für die die Hyperparameter getestet werden sollen (Lernrate und Epochenanzahl). 

In [7]:
h = HP_Optimizer(lr_range = (0.00001,0.0001),step_size = 0.00001, max_epochs=(1,5),cuda = False)
h.help()
h.train_data = train
h.test_data = test
h.validate_combinations()
h.get_all_information()

Parameters with defaults:
lr_range --> (0.0001,0.0001),
step_size--> 0.0001,
max_epochs-->(2,2),
opt-->"Adam",
cuda=True
lr_range: Tupel with learnrate range
step_size: float/int for step_size of learnrate
max_epochs: Tupel with number of epochs range
opt: Optimizer (by default Adam)
cuda: True/False if cuda should be used, default = True

Attributes:
set self.train_data as dictionary with races (form: {raceId: race(dataframe)})
set self.test_data as dictionary with races (form: {raceId: race(dataframe)})
self.opt_combination: Dictionary which contains the best combination of the given parameters

Methods:
call self.validate_combination() to compare all combinations
get all information/results with self.get_all_information()


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sor

Chosen Model: Netz(
  (fc1): Linear(in_features=110, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=180, bias=True)
  (fc3): Linear(in_features=180, out_features=190, bias=True)
  (fc4): Linear(in_features=190, out_features=120, bias=True)
  (fc5): Linear(in_features=120, out_features=100, bias=True)
  (fc6): Linear(in_features=100, out_features=70, bias=True)
  (fc7): Linear(in_features=70, out_features=30, bias=True)
  (fc8): Linear(in_features=30, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
Learningrate Range: (1e-05, 0.0001)
Maximum Epochs: (1, 5)
Chosen Optimizer: Adam
Result Encoding: {6907: {'lr': 1e-05, 'epochen': 1}, 4621: {'lr': 2e-05, 'epochen': 1}, 7786: {'lr': 3.0000000000000004e-05, 'epochen': 1}, 8612: {'lr': 4e-05, 'epochen': 1}, 1995: {'lr': 5e-05, 'epochen': 1}, 5925: {'lr': 6e-05, 'epochen': 1}, 7085: {'lr': 7.000000000000001e-05, 'epochen': 1}, 9093: {'lr': 8e-05, 'epochen': 1}, 2904: {'lr': 9e-05, 'epochen': 1

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Es werden die als optimal in den gegebenen Intervallen genommenen Werte aus dem Optimierer genommen und ein neues Modell mit diesen Parametern trainiert. Zum Schluss werden die Ergebnisse des Testlaufes auf den Testdaten ausgegeben.

In [17]:
max_epochs = h.opt_combination['epochen']
lr = h.opt_combination['lr']

cuda = input('Cuda? [y/n]: ')
model = Netz()
if cuda.lower() == 'y':
    model.cuda() 

    
optimizer = optim.Adam(model.parameters(), lr = lr)     

def train_cuda(epoch):
    model.train()
    batch_id = 0
    for data, target in train:
        #data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0).cuda()
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train),
        100. * batch_id / len(train), loss.item()))
        batch_id +=1
        
    #random.shuffle(train_data)
def train_(epoch):
    model.train()
    batch_id = 0
    for data, target in train:
        #data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)#.cuda()
        shape = target.size()[1]
        target = target.resize(shape,1)#.cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train),
        100. * batch_id / len(train), loss.item()))
        batch_id +=1
        
    #random.shuffle(train_data)
if cuda.lower() == 'y':
    for epoch in range(1,max_epochs):
        train_cuda(epoch) 
else:
    for epoch in range(1,max_epochs):
        train_(epoch)  
        

Cuda? [y/n]: n






























































In [18]:
def test_times_cuda(test_final):
    total = 0
    count = 0
    result_dict = {}
    help_dict = {}
    for key in test_final.keys():
        #print(key)
        help_dict = {}
        for data, target in test_final[key]:
            model.eval()
            #files.listdir(path)
            data = data.cuda()
            target = torch.Tensor(target).unsqueeze(0)
            shape = target.size()[1]
            target = target.resize(shape,1).cuda()
            out = model(data).cpu()
            #print(out)
            out = out.detach().numpy()
            #out = np.round(out)
            target = target.cpu()
            target = target.detach().numpy()
            #print(data)
            #print(data["driverId"])
            total += abs(out - target[0][0])
            #print("current_position: ", data[0][0].item())
            #print("Output: ", out)
            #print("Target: ", target)
            help_dict[target[0][0]] = out
            #print("Difference: ", out - target)
            count+=1
        #help_dict = sorted(help_dict.items(), key=operator.itemgetter(1))
        result_dict[key] = help_dict
        
    return result_dict

def test_times(test_final):
    total = 0
    count = 0
    result_dict = {}
    help_dict = {}
    for key in test_final.keys():
        #print(key)
        help_dict = {}
        for data, target in test_final[key]:
            model.eval()
            #files.listdir(path)
            #data = data.cuda()
            target = torch.Tensor(target).unsqueeze(0)
            shape = target.size()[1]
            target = target.resize(shape,1)#.cuda()
            out = model(data)#.cpu()
            #print(out)
            out = out.detach().numpy()
            #out = np.round(out)
            #target = target.cpu()
            target = target.detach().numpy()
            #print(data)
            #print(data["driverId"])
            total += abs(out - target[0][0])
            #print("current_position: ", data[0][0].item())
            #print("Output: ", out)
            #print("Target: ", target)
            help_dict[target[0][0]] = out
            #print("Difference: ", out - target)
            count+=1
        #help_dict = sorted(help_dict.items(), key=operator.itemgetter(1))
        result_dict[key] = help_dict
        
    return result_dict

if cuda.lower() =='y':
    total_results = test_times_cuda(test)
else:
    total_results = test_times(test)


In [19]:
result_dfs = {}
for raceId, dict in total_results.items():    
    #Auswerten der Vorhersagen aus den Outputdaten des Modells
    A = [x[0][0] for x in list((dict.values()))]
    y = list(dict.keys())
    t = pd.DataFrame(columns = ['target', 'prediction'])
    t['target'] = y
    t['prediction'] = A
    #sortieren des DataFrames nach den vorhergesagten Positionen, aufsteigend
    end = sqldf.sqldf('''select * from t order by prediction ASC''')
    end.reset_index(inplace = True)
    #Da DF nun nach prediction aufsteigend sortiert ist, kann veränderter Index als predictete Position gesetzt werden
    end.rename(columns = {'index': 'predicted_position'},inplace = True)
    #zur Übersichtlichkeit wird nun der endgültige DF nach den richtigen Positionen (target) sortiert (aufsteigend)
    end = sqldf.sqldf('''select * from end order by target ASC''')
    end['predicted_position'] = end['predicted_position']+1
    #umstellen der Spaltenreihenfolge
    end = end[['target', 'predicted_position', 'prediction']]
    result_dfs[raceId] = end

In [20]:
for key, value in result_dfs.items():
    print('RaceId:',key)
    print(value,'\n')

RaceId: 852
    target  predicted_position  prediction
0      1.0                   1    1.794732
1      2.0                   2    2.883790
2      3.0                   7    7.683998
3      4.0                   3    3.230210
4      5.0                   9    9.997762
5      6.0                   4    3.871595
6      7.0                   6    7.010408
7      8.0                   5    4.295087
8      9.0                   8    8.238929
9     10.0                  13   12.445101
10    11.0                  11   11.902659
11    12.0                  12   12.144609
12    13.0                  14   12.523632
13    14.0                  16   15.820935
14    15.0                  15   15.770011
15    16.0                  10   11.520939
16    17.0                  17   17.298086
17    18.0                  18   18.589184
18    19.0                  19   18.904297
19    24.0                  20   21.121977 

RaceId: 913
    target  predicted_position  prediction
0      1.0                  

### Anhang

nogo_columns_hannah_kacke = [#'grid',
                #'race_completion',
                'lap_position','circuitId','lap_number',
                'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', 'total_laps',
               'status_clean', 'constructorId',
                'total_milliseconds', 'driverId'
               'lap_in_milliseconds','year', 'stop_binary','constructorId_1.0',
                 'constructorId_3.0',
                 'constructorId_4.0',
                 'constructorId_5.0',
                 'constructorId_6.0',
                 'constructorId_9.0',
                 'constructorId_10.0',
                 'constructorId_15.0',
                 'constructorId_131.0',
                 'constructorId_164.0',
                 'constructorId_166.0',
                 'constructorId_205.0',
                 'constructorId_206.0',
                 'constructorId_207.0',
                 'constructorId_208.0',
                 'constructorId_209.0',
                 'constructorId_210.0']