In [1]:
import pandas as pd
import numpy as np
import os

Einlesen des zuvor erzeugten Datensatzes (siehe Datenaufbereitung.ipynb)

In [2]:
df = pd.read_csv('formula1_datenv1.csv', sep = ';', decimal = '.')
del df['Unnamed: 0']
df.shape

(157755, 19)

In [7]:
df.head()

Unnamed: 0,raceId,year,circuitId,grandprix_name,driverId,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructorId,constructor_name,stop_binary,total_laps,race_completion,grid,status_clean,total_milliseconds,form
0,841.0,2011.0,1.0,Australian Grand Prix,1.0,1.0,2.0,100573.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.017241,2.0,Finished,5392556.0,0.0
1,841.0,2011.0,1.0,Australian Grand Prix,1.0,2.0,2.0,93774.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.034483,2.0,Finished,5392556.0,0.0
2,841.0,2011.0,1.0,Australian Grand Prix,1.0,3.0,2.0,92900.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.051724,2.0,Finished,5392556.0,0.0
3,841.0,2011.0,1.0,Australian Grand Prix,1.0,4.0,2.0,92582.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.068966,2.0,Finished,5392556.0,0.0
4,841.0,2011.0,1.0,Australian Grand Prix,1.0,5.0,2.0,92471.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.086207,2.0,Finished,5392556.0,0.0


Anzahl der uniquen Ausprägungen in jeder Spalte

In [8]:
for col in df.columns:
    print(col, len(df[col].unique()))

raceId 137
year 7
circuitId 26
grandprix_name 25
driverId 57
lap_number 78
lap_position 24
lap_in_milliseconds 54290
driver_fullname 57
podium_position 24
constructorId 17
constructor_name 17
stop_binary 2
total_laps 18
race_completion 948
grid 25
status_clean 3
total_milliseconds 2902
form 130


In [10]:
def hot_encode_top (column, df, feat_count = 10):
    '''
    hot one encoding, limitiert auf die feat_count häufigsten features
    eines nominalen features um zu Hohe dimensionen zu vermeiden
    
    column: liste mit einem oder mehr Spaltennamen, die hot encoded werden sollen
    df: dataframe der die Datenbasis darstellt
    feat_count: Anzahl Spalten die für jede Spalte encoded werden
    '''
    df_ = df.copy(deep = True)
    
    for col in column:
        
        
        #nur die häufigsten feat_count Featues werden encoded
        encode_features = [x for x in df_[col].value_counts(ascending = False).head(feat_count).index]
        
        for feature in encode_features:
            col_feature = col + '_'+str(feature)
            #dort wo feature nicht dem encode feature entspricht wird eine 0 gesetzt
            df_[col_feature] = df_.where(df_[col] == feature, other = 0)[col]
            #encode feature selbst wird in dataframe durch eine 1 ersetzt
            df_[col_feature].replace(feature, 1, inplace = True)
        
        #löschen der nun "bereinigten" Spalte
        del df_[col]
        
    return df_

In [11]:
encoded_df = hot_encode_top(['constructorId', 'driverId', 'total_laps','year',"podium_position"], df, 5)
encoded_df.shape

(157755, 39)

In [56]:
encoded_df.head(15)

Unnamed: 0,raceId,circuitId,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,total_laps_56.0,total_laps_71.0,total_laps_53.0,total_laps_70.0,total_laps_57.0,year_2012.0,year_2016.0,year_2011.0,year_2013.0,year_2014.0
0,841.0,1.0,Australian Grand Prix,1.0,2.0,100573.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,841.0,1.0,Australian Grand Prix,2.0,2.0,93774.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,841.0,1.0,Australian Grand Prix,3.0,2.0,92900.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,841.0,1.0,Australian Grand Prix,4.0,2.0,92582.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,841.0,1.0,Australian Grand Prix,5.0,2.0,92471.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,841.0,1.0,Australian Grand Prix,6.0,2.0,92434.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6,841.0,1.0,Australian Grand Prix,7.0,2.0,92447.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,841.0,1.0,Australian Grand Prix,8.0,2.0,92310.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8,841.0,1.0,Australian Grand Prix,9.0,2.0,92612.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,841.0,1.0,Australian Grand Prix,10.0,2.0,93121.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Hannahs zeug zum split undso

In [3]:
df = pd.read_csv('formula1_datenv1.csv', sep = ';', decimal = '.')
del df['Unnamed: 0']
df.shape

(157755, 19)

In [4]:
df.head()

Unnamed: 0,raceId,year,circuitId,grandprix_name,driverId,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructorId,constructor_name,stop_binary,total_laps,race_completion,grid,status_clean,total_milliseconds,form
0,841.0,2011.0,1.0,Australian Grand Prix,1.0,1.0,2.0,100573.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.017241,2.0,Finished,5392556.0,0.0
1,841.0,2011.0,1.0,Australian Grand Prix,1.0,2.0,2.0,93774.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.034483,2.0,Finished,5392556.0,0.0
2,841.0,2011.0,1.0,Australian Grand Prix,1.0,3.0,2.0,92900.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.051724,2.0,Finished,5392556.0,0.0
3,841.0,2011.0,1.0,Australian Grand Prix,1.0,4.0,2.0,92582.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.068966,2.0,Finished,5392556.0,0.0
4,841.0,2011.0,1.0,Australian Grand Prix,1.0,5.0,2.0,92471.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.086207,2.0,Finished,5392556.0,0.0


# NICHT AUSFÜHREN BRAUCHT EWIG

In [5]:
drivers = df.driverId.tolist()
#Kategorische spalten in dummievariablen umwandeln
df = pd.get_dummies(df, columns=['circuitId', 'driverId', 'constructorId', 'status_clean'])
df['driverId']= drivers
#dictionary enthält einzelne datensätze zu jedem rennen
split_by_race = {}
#grenzwert ab dem vorhersage über rennausgang gemacht werden soll (orientiert sich an race_completion)
border = 0.5
#dictionary, welches renndatensätze nur bis zu einem gewissen zeitpunkt enthält (zp wird nach border gewählt)
sliced_races = {}

for rid in df['raceId'].unique():
    race = df.where(df.raceId == rid).dropna(how = 'all')
    race.reset_index(inplace = True, drop = True)
    split_by_race[rid] = race
    
    #finden der lap_number wo race_completion die angegebene border überschreitet
    last_lap_num = race.where(race.race_completion == min(race.where(race.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]
    
    #es werden nur daten aus race genommen, die bis zu dieser lap_number gehen
    race_shortened = race.where(race.lap_number < last_lap_num).dropna(how = 'all')
    race_shortened.reset_index(inplace = True, drop = True)
    
    #hinzufügen einer spalte die die insgesamt gefahrenen millisekunden enthält
    #bis zu dieser Runde pro Fahrer enthält
    race_shortened['sum_milliseconds_pro_lap'] = 0
    
    for did in race_shortened.driverId.unique():
        for lapnum in race_shortened.lap_number.unique():
            sum_ms = np.sum(race_shortened.where(np.logical_and(race_shortened.driverId == did,race_shortened.lap_number<=lapnum)).dropna(how = 'all')['lap_in_milliseconds'])
            
            #setzen der bisher gefahrenen Zeit (kumuliert) pro Fahrer und Runde
            race_shortened.loc[race_shortened.where(np.logical_and(race_shortened.driverId == did,race_shortened.lap_number==lapnum)).dropna(how = 'all').index,'sum_milliseconds_pro_lap'] = sum_ms
    
    sliced_races[rid] = race_shortened
    
    
    

In [6]:
if not os.path.exists('sliced_data'):
    os.makedirs('sliced_data')
    
if not os.path.exists('split_data'):
    os.makedirs('split_data')

for key, value in sliced_races.items():
    name = 'sliced_data/sliced_'+str(int(key))+'.csv'
    value.to_csv(name,sep = ';', decimal = '.')
for key, value in split_by_race.items():
    name = 'split_data/split_'+str(int(key))+'.csv'
    value.to_csv(name,sep = ';', decimal = '.')

In [58]:
sliced_races[841].shape

(789, 120)

In [2]:
rain_id = [847,861,879,910,914,934,942,953,957,967,970,982]
if os.path.exists('sliced_data'):
    csv_filenames = []
    #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
    for filename in os.listdir(os.getcwd()+'/sliced_data'):
        typ = filename.split('.')[-1]
        name = filename.split('.')[0]
        if typ == 'csv':
            csv_filenames.append(filename)
    sliced_races = {}
    #einlesen und abspeichern als dataframe aller dateien
    for file in csv_filenames:
        try:
            df = pd.read_csv('sliced_data/'+file, engine = 'python', sep = ';', decimal = '.')
            del df['Unnamed: 0']
        except Exception as e:
            df = pd.read_csv('sliced_data/'+file, engine = 'c', sep = ';', decimal = '.')
            del df['Unnamed: 0']
            print(e)
        #print(df.head())
        f = int(file.split('_')[-1].split('.')[0])
        df["rain"] = 0
        if list(df["raceId"])[0] in rain_id:
            df["rain"] = 1
        sliced_races[f] = df
    print('Einlesen der sliced Dateien erfolgreich')
else:
    print('Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')
    
if os.path.exists('split_data'):
    csv_filenames = []
    #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
    for filename in os.listdir(os.getcwd()+'/split_data'):
        typ = filename.split('.')[-1]
        name = filename.split('.')[0]
        if typ == 'csv':
            csv_filenames.append(filename)
    split_by_race = {}
    #einlesen und abspeichern als dataframe aller dateien
    for file in csv_filenames:
        try:
            df = pd.read_csv('split_data/'+file, engine = 'python', sep = ';', decimal = '.')
            del df['Unnamed: 0']
        except Exception as e:
            df = pd.read_csv('split_data/'+file, engine = 'c', sep = ';', decimal = '.')
            del df['Unnamed: 0']
            print(e)
        #print(df.head())
        f = int(file.split('_')[-1].split('.')[0])
        split_by_race[f] = df
    print('Einlesen der split Dateien erfolgreich')
else:
    print('Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')

Einlesen der sliced Dateien erfolgreich
Einlesen der split Dateien erfolgreich


In [25]:
(sliced_races.keys())

dict_keys([841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 926, 927, 928, 929, 930, 931, 932, 933, 934, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988])

In [38]:
sliced_races[988]

Unnamed: 0,raceId,year,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,constructorId_207.0,constructorId_208.0,constructorId_209.0,constructorId_210.0,status_clean_DNF,status_clean_Finished,status_clean_lapped,driverId,sum_milliseconds_pro_lap,rain
0,988.0,2017.0,Abu Dhabi Grand Prix,1.0,11.0,113969.0,Fernando Alonso,9.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,113969.0,0
1,988.0,2017.0,Abu Dhabi Grand Prix,2.0,11.0,107148.0,Fernando Alonso,9.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,221117.0,0
2,988.0,2017.0,Abu Dhabi Grand Prix,3.0,11.0,105895.0,Fernando Alonso,9.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,327012.0,0
3,988.0,2017.0,Abu Dhabi Grand Prix,4.0,11.0,105576.0,Fernando Alonso,9.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,432588.0,0
4,988.0,2017.0,Abu Dhabi Grand Prix,5.0,11.0,105504.0,Fernando Alonso,9.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,538092.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
528,988.0,2017.0,Abu Dhabi Grand Prix,23.0,19.0,124783.0,Kevin Magnussen,13.0,Haas F1 Team,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,825.0,2478635.0,0
529,988.0,2017.0,Abu Dhabi Grand Prix,24.0,19.0,104938.0,Kevin Magnussen,13.0,Haas F1 Team,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,825.0,2583573.0,0
530,988.0,2017.0,Abu Dhabi Grand Prix,25.0,19.0,104785.0,Kevin Magnussen,13.0,Haas F1 Team,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,825.0,2688358.0,0
531,988.0,2017.0,Abu Dhabi Grand Prix,26.0,19.0,104990.0,Kevin Magnussen,13.0,Haas F1 Team,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,825.0,2793348.0,0


In [114]:
temp = split_by_race[911]

In [115]:
temp

Unnamed: 0,raceId,year,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,constructorId_205.0,constructorId_206.0,constructorId_207.0,constructorId_208.0,constructorId_209.0,constructorId_210.0,status_clean_DNF,status_clean_Finished,status_clean_lapped,driverId
0,911.0,2014.0,Belgian Grand Prix,1.0,10.0,123675.0,Jenson Button,6.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18.0
1,911.0,2014.0,Belgian Grand Prix,2.0,9.0,118275.0,Jenson Button,6.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18.0
2,911.0,2014.0,Belgian Grand Prix,3.0,8.0,116999.0,Jenson Button,6.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18.0
3,911.0,2014.0,Belgian Grand Prix,4.0,8.0,116803.0,Jenson Button,6.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18.0
4,911.0,2014.0,Belgian Grand Prix,5.0,8.0,116559.0,Jenson Button,6.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,911.0,2014.0,Belgian Grand Prix,35.0,19.0,119272.0,Jules Bianchi,18.0,Marussia,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,824.0
854,911.0,2014.0,Belgian Grand Prix,36.0,19.0,117850.0,Jules Bianchi,18.0,Marussia,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,824.0
855,911.0,2014.0,Belgian Grand Prix,37.0,19.0,116708.0,Jules Bianchi,18.0,Marussia,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,824.0
856,911.0,2014.0,Belgian Grand Prix,38.0,19.0,117388.0,Jules Bianchi,18.0,Marussia,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,824.0


In [29]:
#setzen der prozentualen Grenze, ab der weiterer Verlauf des Rennens vorhergesagt werden soll
border = 0.7

for race in split_by_race.values():
    
    #finden der lap_number wo race_completion die angegebene border überschreitet
    last_lap_num = race.where(race.race_completion == min(race.where(race.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]
    
    #es werden nur daten aus race genommen, die bis zu dieser lap_number gehen
    race_shortened = race.where(race.lap_number < last_lap_num).dropna(how = 'all')
    race_shortened.reset_index(inplace = True, drop = True)
    

In [24]:
temp.where(temp.race_completion == min(temp.where(temp.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]

41.0

# ANN Try

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random

In [12]:
x = temp[['year', 'circuitId', 'driverId',
       'lap_number', 'lap_position', 'lap_in_milliseconds', 'driver_fullname',
       'constructorId', 'stop_binary',
       'race_completion', 'grid', 'status_clean',
       'form']]
y = temp['podium_position']

In [25]:
encoded_df = hot_encode_top(['circuitId','constructorId', 'driverId', 'year', "status_clean"], x, 5)
encoded_df.shape

(1083, 26)

In [26]:
print(encoded_df.columns)
encoded_df.head()

Index(['raceId', 'grandprix_name', 'lap_number', 'lap_position',
       'lap_in_milliseconds', 'driver_fullname', 'constructor_name',
       'stop_binary', 'race_completion', 'grid', 'form', 'circuitId_1.0',
       'constructorId_15.0', 'constructorId_9.0', 'constructorId_6.0',
       'constructorId_1.0', 'constructorId_4.0', 'driverId_1.0',
       'driverId_155.0', 'driverId_18.0', 'driverId_808.0', 'driverId_815.0',
       'year_2011.0', 'status_clean_Finished', 'status_clean_lapped',
       'status_clean_DNF'],
      dtype='object')


Unnamed: 0,raceId,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,constructor_name,stop_binary,race_completion,grid,...,constructorId_4.0,driverId_1.0,driverId_155.0,driverId_18.0,driverId_808.0,driverId_815.0,year_2011.0,status_clean_Finished,status_clean_lapped,status_clean_DNF
0,841.0,Australian Grand Prix,1.0,2.0,100573.0,Lewis Hamilton,McLaren,0.0,0.017241,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
1,841.0,Australian Grand Prix,2.0,2.0,93774.0,Lewis Hamilton,McLaren,0.0,0.034483,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
2,841.0,Australian Grand Prix,3.0,2.0,92900.0,Lewis Hamilton,McLaren,0.0,0.051724,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
3,841.0,Australian Grand Prix,4.0,2.0,92582.0,Lewis Hamilton,McLaren,0.0,0.068966,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
4,841.0,Australian Grand Prix,5.0,2.0,92471.0,Lewis Hamilton,McLaren,0.0,0.086207,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0


In [27]:
x_tensor = torch.tensor(encoded_df[['lap_number', 'lap_position',
       'lap_in_milliseconds',
       'stop_binary', 'race_completion', 'grid', 'form', 'circuitId_1.0',
       'constructorId_15.0', 'constructorId_9.0', 'constructorId_6.0',
       'constructorId_1.0', 'constructorId_4.0', 'driverId_1.0',
       'driverId_155.0', 'driverId_18.0', 'driverId_808.0', 'driverId_815.0',
       'year_2011.0', 'status_clean_Finished', 'status_clean_lapped',
       'status_clean_DNF']].values)

In [32]:
x_tensor.size()

torch.Size([1083, 22])

In [43]:
lin1 = nn.Linear(22, 40)
lin1(x_tensor.float())

tensor([[ 15269.8994,  -3026.0515,  -2752.2817,  ...,  -9950.7588,
         -10450.6045, -12089.6338],
        [ 14237.4775,  -2821.4431,  -2566.3311,  ...,  -9278.1924,
          -9744.3271, -11272.1572],
        [ 14104.6475,  -2795.1162,  -2542.5476,  ...,  -9191.8955,
          -9653.7158, -11166.8906],
        ...,
        [ 14452.6826,  -2863.9412,  -2611.0034,  ...,  -9426.6699,
          -9907.6748, -11442.5918],
        [ 14586.6182,  -2890.4822,  -2635.3091,  ...,  -9514.2246,
          -9999.6348, -11548.5244],
        [ 14954.8271,  -2963.4519,  -2701.8469,  ...,  -9754.4561,
         -10251.9287, -11839.9326]], grad_fn=<AddmmBackward>)

[841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 926, 927, 928, 929, 930, 931, 932, 933, 934, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988]

## Training and Test Data split and preparation

In [109]:
train_data = []
test_data = []
test_final = {}
temp_y_podium = []
test_races = list(sliced_races.keys())
random.shuffle(test_races)
test_races = test_races[0:5]
nogo_columns_hannah_kacke = [#'grid',
                #'race_completion',
                'lap_position','circuitId','lap_number',
                'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', 'total_laps',
               'status_clean', 'constructorId',
                'total_milliseconds', 'driverId'
               'lap_in_milliseconds','year', 'stop_binary','constructorId_1.0',
                 'constructorId_3.0',
                 'constructorId_4.0',
                 'constructorId_5.0',
                 'constructorId_6.0',
                 'constructorId_9.0',
                 'constructorId_10.0',
                 'constructorId_15.0',
                 'constructorId_131.0',
                 'constructorId_164.0',
                 'constructorId_166.0',
                 'constructorId_205.0',
                 'constructorId_206.0',
                 'constructorId_207.0',
                 'constructorId_208.0',
                 'constructorId_209.0',
                 'constructorId_210.0']
nogo_columns = ['year', 'podium_position', 'raceId','lap_number','total_laps','driverId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
for key, value in sliced_races.items():
    helper = key
    for did in value.driverId.unique():
        temp = value.where(value.driverId == did).dropna(how = "all")
        if list(temp["podium_position"])[0] < 0: #Top x finish positions
            pp = 1
        else:
            if key in test_races:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                #temp_x = x_tensor.float()
                test_data.append((x_tensor, [temp_y[0]]))
            else:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                #temp_x = x_tensor.float()
                train_data.append((x_tensor, [temp_y[0]]))
    if key in test_races:
        test_final[key]=test_data
    test_data = []
random.shuffle(train_data)
#random.shuffle(test_data)
#test_data = train_data[len(train_data)-100:]
train_data = train_data#[0:len(train_data)-100]
        
        #break
        #for i, row in temp.iterrows():

## Batch ansatz

In [104]:
train_data = []
train_data_x = []
train_data_y = []
test_data = []
count = 0
nogo_columns_hannah = [#'grid',
                #'race_completion',
                'lap_position','circuitId','lap_number',
                'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', 'total_laps',
               'status_clean', 'constructorId',
                'total_milliseconds', 'driverId'
               'lap_in_milliseconds','year', 'stop_binary','constructorId_1.0',
                 'constructorId_3.0',
                 'constructorId_4.0',
                 'constructorId_5.0',
                 'constructorId_6.0',
                 'constructorId_9.0',
                 'constructorId_10.0',
                 'constructorId_15.0',
                 'constructorId_131.0',
                 'constructorId_164.0',
                 'constructorId_166.0',
                 'constructorId_205.0',
                 'constructorId_206.0',
                 'constructorId_207.0',
                 'constructorId_208.0',
                 'constructorId_209.0',
                 'constructorId_210.0']
nogo_columns = ['year', 'podium_position', 'raceId','lap_number','total_laps','driverId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
for key, value in sliced_races.items():
    for did in value.driverId.unique():
        temp = value.where(value.driverId == did).dropna(how = "all")
        if list(temp["podium_position"])[0] < 0: #Top x finish positions
            pp = 1
        else:
            if key == 988:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                test_data.append((x_tensor, [temp_y[0]]))
                
            else:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                #temp_x = x_tensor.float()
                #train_data.append((x_tensor, [temp_y[0]]))
                train_data_x.append(x_tensor)
                train_data_y.append([temp_y[0]])
            
    #print(count)
    #count +=1
    if count == 137:
        break
    train_data.append((torch.stack(train_data_x), train_data_y))
    
    train_data_x = []
    train_data_y = []
print("hier", len(train_data))            
random.shuffle(train_data)
random.shuffle(test_data)
#test_data = train_data[len(train_data)-100:]
train_data = train_data#[0:len(train_data)-100]
print("hier", len(train_data))            
        #break
        #for i, row in temp.iterrows():

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
hier 136
hier 136


In [87]:
train_data[0]

(tensor([[[ 2.0000,  7.0000,  0.4828,  ...,  0.0000, 43.6360,  0.0000]],
 
         [[ 7.0000,  7.0000,  0.4828,  ...,  0.0000, 44.5796,  0.0000]],
 
         [[15.0000,  7.0000,  0.4828,  ...,  0.0000, 45.8120,  0.0000]],
 
         ...,
 
         [[16.0000,  7.0000,  0.4828,  ...,  1.0000, 45.9951,  0.0000]],
 
         [[18.0000,  7.0000,  0.4828,  ...,  0.0000, 56.5002,  0.0000]],
 
         [[17.0000,  7.0000,  0.4828,  ...,  0.0000, 46.9338,  0.0000]]],
        dtype=torch.float64),
 [[2.0],
  [6.0],
  [24.0],
  [24.0],
  [12.0],
  [3.0],
  [8.0],
  [11.0],
  [4.0],
  [7.0],
  [9.0],
  [10.0],
  [5.0],
  [1.0],
  [24.0],
  [24.0],
  [24.0],
  [24.0],
  [24.0],
  [13.0],
  [24.0],
  [14.0]])

In [217]:
temp_x.sum(axis = 0)[2]

0.0

In [40]:
temp[cols]

Unnamed: 0,lap_position,stop_binary,race_completion,grid,form,circuitId_1.0,circuitId_2.0,circuitId_3.0,circuitId_4.0,circuitId_5.0,...,constructorId_206.0,constructorId_207.0,constructorId_208.0,constructorId_209.0,constructorId_210.0,status_clean_DNF,status_clean_Finished,status_clean_lapped,sum_milliseconds_pro_lap,rain
506,20.0,0.0,0.018182,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,120490.0,0.0
507,20.0,0.0,0.036364,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,229189.0,0.0
508,20.0,0.0,0.054545,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,336592.0,0.0
509,20.0,0.0,0.072727,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,443352.0,0.0
510,20.0,0.0,0.090909,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,550549.0,0.0
511,20.0,0.0,0.109091,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,657506.0,0.0
512,19.0,0.0,0.127273,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,763850.0,0.0
513,19.0,0.0,0.145455,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,870174.0,0.0
514,19.0,0.0,0.163636,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,976417.0,0.0
515,19.0,0.0,0.181818,14.0,13.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1082838.0,0.0


In [42]:
train_data[0][0][0][0]

tensor(5., dtype=torch.float64)

# YAYYYYY

In [None]:
train_data = []
test_data = []
test_final = {}
temp_y_podium = []
test_races = list(sliced_races.keys())
random.shuffle(test_races)
test_races = test_races[0:5]
nogo_columns_hannah_kacke = [#'grid',
                #'race_completion',
                'lap_position','circuitId','lap_number',
                'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', 'total_laps',
               'status_clean', 'constructorId',
                'total_milliseconds', 'driverId'
               'lap_in_milliseconds','year', 'stop_binary','constructorId_1.0',
                 'constructorId_3.0',
                 'constructorId_4.0',
                 'constructorId_5.0',
                 'constructorId_6.0',
                 'constructorId_9.0',
                 'constructorId_10.0',
                 'constructorId_15.0',
                 'constructorId_131.0',
                 'constructorId_164.0',
                 'constructorId_166.0',
                 'constructorId_205.0',
                 'constructorId_206.0',
                 'constructorId_207.0',
                 'constructorId_208.0',
                 'constructorId_209.0',
                 'constructorId_210.0']
nogo_columns = ['year', 'podium_position', 'raceId','lap_number','total_laps','driverId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
for key, value in sliced_races.items():
    helper = key
    for did in value.driverId.unique():
        temp = value.where(value.driverId == did).dropna(how = "all")
        if list(temp["podium_position"])[0] < 0: #Top x finish positions
            pp = 1
        else:
            if key in test_races:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                #temp_x = x_tensor.float()
                test_data.append((x_tensor, [temp_y[0]]))
            else:
                temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
                temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
                #temp_y = temp_y[0]
                cols = [col for col in temp.columns if col not in nogo_columns]
                temp_x = temp[cols]
                stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
                temp_x = temp_x.tail(1)
                temp_x['stop_binary'] = stops
                x_tensor = torch.tensor(temp_x[temp_x.columns].values)
                #temp_x = x_tensor.float()
                train_data.append((x_tensor, [temp_y[0]]))
    if key in test_races:
        test_final[key]=test_data
    test_data = []
random.shuffle(train_data)
#random.shuffle(test_data)
#test_data = train_data[len(train_data)-100:]
train_data = train_data#[0:len(train_data)-100]
        
        #break
        #for i, row in temp.iterrows():

In [79]:
class Netz(nn.Module):
    def __init__(self):
        super(Netz,self).__init__()
        self.fc1 = nn.Linear(110, 150)
        self.fc2 = nn.Linear(150, 180)
        self.fc3 = nn.Linear(180, 190)
        self.fc4 = nn.Linear(190, 120)
        self.fc5 = nn.Linear(120, 100)
        self.fc6 = nn.Linear(100, 70)
        self.fc7 = nn.Linear(70, 30)
        self.fc8 = nn.Linear(30, 1)
        self.dropout = nn.Dropout()
        
        
    def forward(self,x):
        x = self.fc1(x.float())
        x = F.relu(x.float())
        x = self.fc2(x.float())
        x = F.relu(x.float())
        x = self.dropout(x)
        x = self.fc3(x.float())
        x = F.relu(x.float())
        x = self.fc4(x.float())
        x = F.relu(x.float())
        #x = self.dropout(x)
        x = self.fc5(x.float())
        x = F.relu(x.float())
        x = self.fc6(x.float())
        x = F.relu(x.float())
        x = self.fc7(x.float())
        x = F.relu(x.float())
        x = self.fc8(x.float())
        return x
        

In [80]:

model = Netz()
model.cuda()
        

optimizer = optim.Adam(model.parameters(), lr = 0.0001)        
def train(epoch):
    model.train()
    batch_id = 0
    for data, target in train_data:
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0).cuda()
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train_data),
        100. * batch_id / len(train_data), loss.item()))
        batch_id +=1
        
    #random.shuffle(train_data)
        

In [81]:
for epoch in range(1,10):
    train(epoch)
















































































































































































































































































































### Total Loss werte
- Die Fahrer unter den ersten 4 halten meistens ihre Position nach 50% des rennens -> wäre besser das vorher zu sagen

In [111]:
total_results[0][904] == total_results[0][911]

False

In [110]:
total_results = test_times(test_final)
total_results

845
tensor([[2.0313]], grad_fn=<CopyBackwards>)
Output:  [[2.0312984]]
Target:  [[2.]]
Difference:  [[0.0312984]]
tensor([[3.4460]], grad_fn=<CopyBackwards>)
Output:  [[3.4460022]]
Target:  [[3.]]
Difference:  [[0.44600224]]
tensor([[13.8712]], grad_fn=<CopyBackwards>)
Output:  [[13.871241]]
Target:  [[17.]]
Difference:  [[-3.1287594]]
tensor([[11.0865]], grad_fn=<CopyBackwards>)
Output:  [[11.08646]]
Target:  [[15.]]
Difference:  [[-3.91354]]
tensor([[12.0378]], grad_fn=<CopyBackwards>)
Output:  [[12.037809]]
Target:  [[8.]]
Difference:  [[4.0378094]]
tensor([[8.5567]], grad_fn=<CopyBackwards>)
Output:  [[8.556719]]
Target:  [[11.]]
Difference:  [[-2.4432812]]
tensor([[10.2277]], grad_fn=<CopyBackwards>)
Output:  [[10.227651]]
Target:  [[14.]]
Difference:  [[-3.7723494]]
tensor([[11.2649]], grad_fn=<CopyBackwards>)
Output:  [[11.264933]]
Target:  [[16.]]
Difference:  [[-4.7350674]]
tensor([[5.0621]], grad_fn=<CopyBackwards>)
Output:  [[5.06207]]
Target:  [[5.]]
Difference:  [[0.062069

Output:  [[16.086573]]
Target:  [[22.]]
Difference:  [[-5.9134274]]
tensor([[8.4226]], grad_fn=<CopyBackwards>)
Output:  [[8.4226055]]
Target:  [[8.]]
Difference:  [[0.4226055]]
tensor([[6.7870]], grad_fn=<CopyBackwards>)
Output:  [[6.787049]]
Target:  [[9.]]
Difference:  [[-2.2129512]]
tensor([[6.5450]], grad_fn=<CopyBackwards>)
Output:  [[6.5450044]]
Target:  [[5.]]
Difference:  [[1.5450044]]
tensor([[3.5002]], grad_fn=<CopyBackwards>)
Output:  [[3.5001585]]
Target:  [[2.]]
Difference:  [[1.5001585]]
tensor([[14.7554]], grad_fn=<CopyBackwards>)
Output:  [[14.755417]]
Target:  [[15.]]
Difference:  [[-0.24458313]]
tensor([[5.8469]], grad_fn=<CopyBackwards>)
Output:  [[5.8469296]]
Target:  [[11.]]
Difference:  [[-5.1530704]]
tensor([[4.4758]], grad_fn=<CopyBackwards>)
Output:  [[4.4757695]]
Target:  [[4.]]
Difference:  [[0.47576952]]
tensor([[2.8847]], grad_fn=<CopyBackwards>)
Output:  [[2.8847427]]
Target:  [[3.]]
Difference:  [[-0.11525726]]
tensor([[12.3838]], grad_fn=<CopyBackwards>

[{845: {2.0: array([[2.0312984]], dtype=float32),
   3.0: array([[3.4460022]], dtype=float32),
   17.0: array([[13.871241]], dtype=float32),
   15.0: array([[11.08646]], dtype=float32),
   8.0: array([[12.037809]], dtype=float32),
   11.0: array([[8.556719]], dtype=float32),
   14.0: array([[10.227651]], dtype=float32),
   16.0: array([[11.264933]], dtype=float32),
   5.0: array([[5.06207]], dtype=float32),
   24.0: array([[22.33629]], dtype=float32),
   13.0: array([[13.723007]], dtype=float32),
   12.0: array([[10.458667]], dtype=float32),
   4.0: array([[2.572261]], dtype=float32),
   1.0: array([[1.1849891]], dtype=float32),
   10.0: array([[12.903986]], dtype=float32),
   9.0: array([[12.729527]], dtype=float32),
   7.0: array([[8.240048]], dtype=float32),
   6.0: array([[8.863543]], dtype=float32),
   18.0: array([[14.264586]], dtype=float32),
   19.0: array([[15.837524]], dtype=float32),
   20.0: array([[22.53565]], dtype=float32),
   21.0: array([[22.73823]], dtype=float32)},
 

In [127]:
split_by_race[845].head()

Unnamed: 0,raceId,year,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,constructorId_205.0,constructorId_206.0,constructorId_207.0,constructorId_208.0,constructorId_209.0,constructorId_210.0,status_clean_DNF,status_clean_Finished,status_clean_lapped,driverId
0,845.0,2011.0,Spanish Grand Prix,1.0,4.0,96991.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,845.0,2011.0,Spanish Grand Prix,2.0,4.0,91189.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
2,845.0,2011.0,Spanish Grand Prix,3.0,4.0,90239.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3,845.0,2011.0,Spanish Grand Prix,4.0,4.0,90011.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
4,845.0,2011.0,Spanish Grand Prix,5.0,4.0,90083.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [125]:
import operator

sorted_results = sorted(total_results[0][845].items(), key=operator.itemgetter(1))

In [126]:
sorted_results

[(1.0, array([[1.1849891]], dtype=float32)),
 (2.0, array([[2.0312984]], dtype=float32)),
 (4.0, array([[2.572261]], dtype=float32)),
 (3.0, array([[3.4460022]], dtype=float32)),
 (5.0, array([[5.06207]], dtype=float32)),
 (7.0, array([[8.240048]], dtype=float32)),
 (11.0, array([[8.556719]], dtype=float32)),
 (6.0, array([[8.863543]], dtype=float32)),
 (14.0, array([[10.227651]], dtype=float32)),
 (12.0, array([[10.458667]], dtype=float32)),
 (15.0, array([[11.08646]], dtype=float32)),
 (16.0, array([[11.264933]], dtype=float32)),
 (8.0, array([[12.037809]], dtype=float32)),
 (9.0, array([[12.729527]], dtype=float32)),
 (10.0, array([[12.903986]], dtype=float32)),
 (13.0, array([[13.723007]], dtype=float32)),
 (17.0, array([[13.871241]], dtype=float32)),
 (18.0, array([[14.264586]], dtype=float32)),
 (19.0, array([[15.837524]], dtype=float32)),
 (24.0, array([[22.33629]], dtype=float32)),
 (20.0, array([[22.53565]], dtype=float32)),
 (21.0, array([[22.73823]], dtype=float32))]

In [104]:
def test_times(test_final):
    total = 0
    count = 0
    result_dict = {}
    help_dict = {}
    for key in test_final.keys():
        print(key)
        help_dict = {}
        for data, target in test_final[key]:
            model.eval()
            #files.listdir(path)
            data = data.cuda()
            target = torch.Tensor(target).unsqueeze(0)
            shape = target.size()[1]
            target = target.resize(shape,1).cuda()
            out = model(data).cpu()
            print(out)
            out = out.detach().numpy()
            #out = np.round(out)
            target = target.cpu()
            target = target.detach().numpy()
            #print(data)
            #print(data["driverId"])
            total += abs(out - target[0][0])
            #print("current_position: ", data[0][0].item())
            print("Output: ", out)
            print("Target: ", target)
            help_dict[target[0][0]] = out
            print("Difference: ", out - target)
            count+=1
        result_dict[key] = help_dict
    print("Total:", total)
    return result_dict

In [108]:
test_final.keys()

dict_keys([845, 904, 911, 950, 988])

In [54]:
def test():
    total = []
    for data, target in test_data:
        model.eval()
        #files.listdir(path)
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        out = model(data).cpu()
        #print(out)
        out = out.detach().numpy()
        #out = np.round(out)
        target = target.cpu()
        target = target.detach().numpy()
        #print(data)
        #print(data["driverId"])
        if out <=41 :
            total.append(out)
            #total += abs(out - target[0][0])
            print("current_position: ", data[0][0].item())
            print("Output: ", out)
            print("Target: ", target)
            print("Difference: ", out - target)
            print("============================")
    print("Total:", total)
    return total


## Test with times

In [53]:
class Netz(nn.Module):
    def __init__(self):
        super(Netz,self).__init__()
        self.fc1 = nn.Linear(110, 150)
        self.fc2 = nn.Linear(150, 180)
        self.fc3 = nn.Linear(180, 190)
        self.fc4 = nn.Linear(190, 120)
        self.fc5 = nn.Linear(120, 100)
        self.fc6 = nn.Linear(100, 70)
        self.fc7 = nn.Linear(70, 30)
        self.fc8 = nn.Linear(30, 1)
        self.dropout = nn.Dropout()
        
        
    def forward(self,x):
        x = self.fc1(x.float())
        x = F.relu(x.float())
        x = self.fc2(x.float())
        x = F.relu(x.float())
        x = self.dropout(x)
        x = self.fc3(x.float())
        x = F.relu(x.float())
        x = self.fc4(x.float())
        x = F.relu(x.float())
        #x = self.dropout(x)
        x = self.fc5(x.float())
        x = F.relu(x.float())
        x = self.fc6(x.float())
        x = F.relu(x.float())
        x = self.fc7(x.float())
        x = F.relu(x.float())
        x = self.fc8(x.float())
        return x
        

In [54]:

model = Netz()
model.cuda()
        

optimizer = optim.Adam(model.parameters(), lr = 0.0001)        
def train(epoch):
    model.train()
    batch_id = 0
    for data, target in train_data:
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0).cuda()
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train_data),
        100. * batch_id / len(train_data), loss.item()))
        batch_id +=1
        

In [55]:
for epoch in range(1,7):
    train(epoch)









































































































In [56]:
result_dict = test_times()

tensor([[1.2837]], grad_fn=<CopyBackwards>)
Difference:  [[-0.31926084]]
tensor([[1.2735]], grad_fn=<CopyBackwards>)
Difference:  [[-0.34694338]]
tensor([[1.2756]], grad_fn=<CopyBackwards>)
Difference:  [[-0.3283006]]
tensor([[1.3227]], grad_fn=<CopyBackwards>)
Difference:  [[-0.30255592]]
tensor([[1.3438]], grad_fn=<CopyBackwards>)
Difference:  [[-0.25054443]]
tensor([[1.0525]], grad_fn=<CopyBackwards>)
Difference:  [[0.14678383]]
tensor([[1.2786]], grad_fn=<CopyBackwards>)
Difference:  [[-0.34367]]
tensor([[1.2398]], grad_fn=<CopyBackwards>)
Difference:  [[-0.38193464]]
tensor([[1.2548]], grad_fn=<CopyBackwards>)
Difference:  [[-0.32841027]]
tensor([[1.2766]], grad_fn=<CopyBackwards>)
Difference:  [[-0.29935277]]
tensor([[1.3232]], grad_fn=<CopyBackwards>)
Difference:  [[-0.27292764]]
tensor([[1.3229]], grad_fn=<CopyBackwards>)
Difference:  [[-0.27516162]]
tensor([[0.7461]], grad_fn=<CopyBackwards>)
Difference:  [[0.16438007]]
tensor([[1.2968]], grad_fn=<CopyBackwards>)
Difference:  

In [57]:
result_dict

{9.0: array([[1.2837045]], dtype=float32),
 12.0: array([[1.2735454]], dtype=float32),
 10.0: array([[1.2756276]], dtype=float32),
 18.0: array([[1.3227462]], dtype=float32),
 6.0: array([[1.3438375]], dtype=float32),
 20.0: array([[0.7460873]], dtype=float32),
 16.0: array([[1.2785926]], dtype=float32),
 15.0: array([[1.2397785]], dtype=float32),
 4.0: array([[1.2547697]], dtype=float32),
 3.0: array([[1.2765895]], dtype=float32),
 7.0: array([[1.3232179]], dtype=float32),
 8.0: array([[1.3228865]], dtype=float32),
 5.0: array([[1.2968102]], dtype=float32),
 17.0: array([[1.3031565]], dtype=float32),
 14.0: array([[1.2890464]], dtype=float32),
 2.0: array([[1.2784088]], dtype=float32),
 1.0: array([[1.2735944]], dtype=float32),
 11.0: array([[1.3023514]], dtype=float32),
 13.0: array([[1.2926636]], dtype=float32)}

In [58]:
import operator

sorted_results = sorted(result_dict.items(), key=operator.itemgetter(1))

In [59]:
sorted_results

[(20.0, array([[0.7460873]], dtype=float32)),
 (15.0, array([[1.2397785]], dtype=float32)),
 (4.0, array([[1.2547697]], dtype=float32)),
 (12.0, array([[1.2735454]], dtype=float32)),
 (1.0, array([[1.2735944]], dtype=float32)),
 (10.0, array([[1.2756276]], dtype=float32)),
 (3.0, array([[1.2765895]], dtype=float32)),
 (2.0, array([[1.2784088]], dtype=float32)),
 (16.0, array([[1.2785926]], dtype=float32)),
 (9.0, array([[1.2837045]], dtype=float32)),
 (14.0, array([[1.2890464]], dtype=float32)),
 (13.0, array([[1.2926636]], dtype=float32)),
 (5.0, array([[1.2968102]], dtype=float32)),
 (11.0, array([[1.3023514]], dtype=float32)),
 (17.0, array([[1.3031565]], dtype=float32)),
 (18.0, array([[1.3227462]], dtype=float32)),
 (8.0, array([[1.3228865]], dtype=float32)),
 (7.0, array([[1.3232179]], dtype=float32)),
 (6.0, array([[1.3438375]], dtype=float32))]

In [40]:
def test_times():
    total = 0
    count = 0
    result_dict = {}
    for data, target in test_data:
        model.eval()
        #files.listdir(path)
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        out = model(data).cpu()
        print(out)
        out = out.detach().numpy()
        #out = np.round(out)
        target = target.cpu()
        target = target.detach().numpy()
        #print(data)
        #print(data["driverId"])
        total += abs(out - target[0][0])
        #print("current_position: ", data[0][0].item())
        #print("Output: ", out)
        #print("Target: ", target)
        result_dict[temp_y_podium[count]] = out
        print("Difference: ", out - target)
        count+=1
    print("Total:", total)
    return result_dict

# Das Funktioniert schonmal Super

In [None]:
train_data = []
nogo_columns_hannah = [#'grid',
                #'race_completion',
                'lap_position','circuitId','lap_number',
                'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', 'total_laps',
               'status_clean', 'constructorId',
                'total_milliseconds', 'driverId'
               'lap_in_milliseconds','year', 'stop_binary','constructorId_1.0',
                 'constructorId_3.0',
                 'constructorId_4.0',
                 'constructorId_5.0',
                 'constructorId_6.0',
                 'constructorId_9.0',
                 'constructorId_10.0',
                 'constructorId_15.0',
                 'constructorId_131.0',
                 'constructorId_164.0',
                 'constructorId_166.0',
                 'constructorId_205.0',
                 'constructorId_206.0',
                 'constructorId_207.0',
                 'constructorId_208.0',
                 'constructorId_209.0',
                 'constructorId_210.0']
nogo_columns = ['year', 'podium_position', 'raceId','lap_number','total_laps','driverId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
for key, value in sliced_races.items():
    for did in value.driverId.unique():
        temp = value.where(value.driverId == did).dropna(how = "all")
        if list(temp["podium_position"])[0] > 30: #Top x finish positions
            pp = 1
        else:
            
            temp['sum_milliseconds_pro_lap'] = temp['sum_milliseconds_pro_lap']/60000
            temp_y = list(temp["podium_position"])#list((temp["total_milliseconds"]/60000))
            #temp_y = temp_y[0]
            cols = [col for col in temp.columns if col not in nogo_columns]
            temp_x = temp[cols]
            stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
            temp_x = temp_x.tail(1)
            temp_x['stop_binary'] = stops
            x_tensor = torch.tensor(temp_x[temp_x.columns].values)
            #temp_x = x_tensor.float()
            train_data.append((x_tensor, [temp_y[0]]))
random.shuffle(train_data)
test_data = train_data[len(train_data)-20:]
train_data = train_data[0:len(train_data)-20]


In [47]:
class Netz(nn.Module):
    def __init__(self):
        super(Netz,self).__init__()
        self.fc1 = nn.Linear(110, 150)
        self.fc2 = nn.Linear(150, 180)
        self.fc3 = nn.Linear(180, 150)
        self.fc4 = nn.Linear(150, 120)
        self.fc5 = nn.Linear(120, 100)
        self.fc6 = nn.Linear(100, 70)
        self.fc7 = nn.Linear(70, 30)
        self.fc8 = nn.Linear(30, 1)
        #self.dropout = nn.Dropout()
        
        
    def forward(self,x):
        x = self.fc1(x.float())
        x = F.relu(x.float())
        x = self.fc2(x.float())
        x = F.relu(x.float())
        #x = self.conv_dropout(x)
        x = self.fc3(x.float())
        x = F.relu(x.float())
        x = self.fc4(x.float())
        x = F.relu(x.float())
        x = self.fc5(x.float())
        x = F.relu(x.float())
        x = self.fc6(x.float())
        x = F.relu(x.float())
        x = self.fc7(x.float())
        x = F.relu(x.float())
        x = self.fc8(x.float())
        return x
        

In [57]:

model = Netz()
model.cuda()
        

optimizer = optim.Adam(model.parameters(), lr = 0.0001)        
def train(epoch):
    model.train()
    batch_id = 0
    for data, target in train_data:
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train_data),
        100. * batch_id / len(train_data), loss.item()))
        batch_id +=1
        
    random.shuffle(train_data)
        

In [62]:
for epoch in range(1,15):
    train(epoch)













































































































In [9]:
def test():
    total = 0
    for data, target in test_data:
        model.eval()
        #files.listdir(path)
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        out = model(data).cpu()
        #print(out)
        out = out.detach().numpy()
        out = np.round(out)
        target = target.cpu()
        target = target.detach().numpy()
        #print(data)
        #print(data["driverId"])
        total += abs(out - target[0][0])
        print("current_position: ", data[0][0].item())
        print("Output: ", out)
        print("Target: ", target)
        print("Difference: ", out - target)
    print("Total:", total)
test()

Total: 0
