In [1]:
import pandas as pd
import numpy as np
import os

Einlesen des zuvor erzeugten Datensatzes (siehe Datenaufbereitung.ipynb)

In [6]:
df = pd.read_csv('formula1_datenv1.csv', sep = ';', decimal = '.')
del df['Unnamed: 0']
df.shape

(157755, 19)

In [7]:
df.head()

Unnamed: 0,raceId,year,circuitId,grandprix_name,driverId,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructorId,constructor_name,stop_binary,total_laps,race_completion,grid,status_clean,total_milliseconds,form
0,841.0,2011.0,1.0,Australian Grand Prix,1.0,1.0,2.0,100573.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.017241,2.0,Finished,5392556.0,0.0
1,841.0,2011.0,1.0,Australian Grand Prix,1.0,2.0,2.0,93774.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.034483,2.0,Finished,5392556.0,0.0
2,841.0,2011.0,1.0,Australian Grand Prix,1.0,3.0,2.0,92900.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.051724,2.0,Finished,5392556.0,0.0
3,841.0,2011.0,1.0,Australian Grand Prix,1.0,4.0,2.0,92582.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.068966,2.0,Finished,5392556.0,0.0
4,841.0,2011.0,1.0,Australian Grand Prix,1.0,5.0,2.0,92471.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.086207,2.0,Finished,5392556.0,0.0


Anzahl der uniquen Ausprägungen in jeder Spalte

In [8]:
for col in df.columns:
    print(col, len(df[col].unique()))

raceId 137
year 7
circuitId 26
grandprix_name 25
driverId 57
lap_number 78
lap_position 24
lap_in_milliseconds 54290
driver_fullname 57
podium_position 24
constructorId 17
constructor_name 17
stop_binary 2
total_laps 18
race_completion 948
grid 25
status_clean 3
total_milliseconds 2902
form 130


In [10]:
def hot_encode_top (column, df, feat_count = 10):
    '''
    hot one encoding, limitiert auf die feat_count häufigsten features
    eines nominalen features um zu Hohe dimensionen zu vermeiden
    
    column: liste mit einem oder mehr Spaltennamen, die hot encoded werden sollen
    df: dataframe der die Datenbasis darstellt
    feat_count: Anzahl Spalten die für jede Spalte encoded werden
    '''
    df_ = df.copy(deep = True)
    
    for col in column:
        
        
        #nur die häufigsten feat_count Featues werden encoded
        encode_features = [x for x in df_[col].value_counts(ascending = False).head(feat_count).index]
        
        for feature in encode_features:
            col_feature = col + '_'+str(feature)
            #dort wo feature nicht dem encode feature entspricht wird eine 0 gesetzt
            df_[col_feature] = df_.where(df_[col] == feature, other = 0)[col]
            #encode feature selbst wird in dataframe durch eine 1 ersetzt
            df_[col_feature].replace(feature, 1, inplace = True)
        
        #löschen der nun "bereinigten" Spalte
        del df_[col]
        
    return df_

In [11]:
encoded_df = hot_encode_top(['constructorId', 'driverId', 'total_laps','year',"podium_position"], df, 5)
encoded_df.shape

(157755, 39)

In [56]:
encoded_df.head(15)

Unnamed: 0,raceId,circuitId,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,total_laps_56.0,total_laps_71.0,total_laps_53.0,total_laps_70.0,total_laps_57.0,year_2012.0,year_2016.0,year_2011.0,year_2013.0,year_2014.0
0,841.0,1.0,Australian Grand Prix,1.0,2.0,100573.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,841.0,1.0,Australian Grand Prix,2.0,2.0,93774.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,841.0,1.0,Australian Grand Prix,3.0,2.0,92900.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,841.0,1.0,Australian Grand Prix,4.0,2.0,92582.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,841.0,1.0,Australian Grand Prix,5.0,2.0,92471.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,841.0,1.0,Australian Grand Prix,6.0,2.0,92434.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6,841.0,1.0,Australian Grand Prix,7.0,2.0,92447.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,841.0,1.0,Australian Grand Prix,8.0,2.0,92310.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8,841.0,1.0,Australian Grand Prix,9.0,2.0,92612.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,841.0,1.0,Australian Grand Prix,10.0,2.0,93121.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Hannahs zeug zum split undso

In [55]:
df = pd.read_csv('formula1_datenv1.csv', sep = ';', decimal = '.')
del df['Unnamed: 0']
df.shape

(157755, 19)

In [50]:
df.head()

Unnamed: 0,raceId,year,circuitId,grandprix_name,driverId,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructorId,constructor_name,stop_binary,total_laps,race_completion,grid,status_clean,total_milliseconds,form
0,841.0,2011.0,1.0,Australian Grand Prix,1.0,1.0,2.0,100573.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.017241,2.0,Finished,5392556.0,0.0
1,841.0,2011.0,1.0,Australian Grand Prix,1.0,2.0,2.0,93774.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.034483,2.0,Finished,5392556.0,0.0
2,841.0,2011.0,1.0,Australian Grand Prix,1.0,3.0,2.0,92900.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.051724,2.0,Finished,5392556.0,0.0
3,841.0,2011.0,1.0,Australian Grand Prix,1.0,4.0,2.0,92582.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.068966,2.0,Finished,5392556.0,0.0
4,841.0,2011.0,1.0,Australian Grand Prix,1.0,5.0,2.0,92471.0,Lewis Hamilton,2.0,1.0,McLaren,0.0,58.0,0.086207,2.0,Finished,5392556.0,0.0


# NICHT AUSFÜHREN BRAUCHT EWIG

In [56]:
drivers = df.driverId.tolist()
#Kategorische spalten in dummievariablen umwandeln
df = pd.get_dummies(df, columns=['circuitId', 'driverId', 'constructorId', 'status_clean'])
df['driverId']= drivers
#dictionary enthält einzelne datensätze zu jedem rennen
split_by_race = {}
#grenzwert ab dem vorhersage über rennausgang gemacht werden soll (orientiert sich an race_completion)
border = 0.7
#dictionary, welches renndatensätze nur bis zu einem gewissen zeitpunkt enthält (zp wird nach border gewählt)
sliced_races = {}

for rid in df['raceId'].unique():
    race = df.where(df.raceId == rid).dropna(how = 'all')
    race.reset_index(inplace = True, drop = True)
    split_by_race[rid] = race
    
    #finden der lap_number wo race_completion die angegebene border überschreitet
    last_lap_num = race.where(race.race_completion == min(race.where(race.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]
    
    #es werden nur daten aus race genommen, die bis zu dieser lap_number gehen
    race_shortened = race.where(race.lap_number < last_lap_num).dropna(how = 'all')
    race_shortened.reset_index(inplace = True, drop = True)
    
    #hinzufügen einer spalte die die insgesamt gefahrenen millisekunden enthält
    #bis zu dieser Runde pro Fahrer enthält
    race_shortened['sum_milliseconds_pro_lap'] = 0
    
    for did in race_shortened.driverId.unique():
        for lapnum in race_shortened.lap_number.unique():
            sum_ms = np.sum(race_shortened.where(np.logical_and(race_shortened.driverId == did,race_shortened.lap_number<=lapnum)).dropna(how = 'all')['lap_in_milliseconds'])
            
            #setzen der bisher gefahrenen Zeit (kumuliert) pro Fahrer und Runde
            race_shortened.loc[race_shortened.where(np.logical_and(race_shortened.driverId == did,race_shortened.lap_number==lapnum)).dropna(how = 'all').index,'sum_milliseconds_pro_lap'] = sum_ms
    
    sliced_races[rid] = race_shortened
    
    
    

In [2]:
if not os.path.exists('sliced_data'):
    os.makedirs('sliced_data')
    
if not os.path.exists('split_data'):
    os.makedirs('split_data')

for key, value in sliced_races.items():
    name = 'sliced_data/sliced_'+str(int(key))+'.csv'
    value.to_csv(name,sep = ';', decimal = '.')
for key, value in split_by_race.items():
    name = 'split_data/split_'+str(int(key))+'.csv'
    value.to_csv(name,sep = ';', decimal = '.')

NameError: name 'sliced_races' is not defined

In [58]:
sliced_races[841].shape

(789, 120)

In [3]:
if os.path.exists('sliced_data'):
    csv_filenames = []
    #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
    for filename in os.listdir(os.getcwd()+'/sliced_data'):
        typ = filename.split('.')[-1]
        name = filename.split('.')[0]
        if typ == 'csv':
            csv_filenames.append(filename)
    sliced_races = {}
    #einlesen und abspeichern als dataframe aller dateien
    for file in csv_filenames:
        try:
            df = pd.read_csv('sliced_data/'+file, engine = 'python', sep = ';', decimal = '.')
            del df['Unnamed: 0']
        except Exception as e:
            df = pd.read_csv('sliced_data/'+file, engine = 'c', sep = ';', decimal = '.')
            del df['Unnamed: 0']
            print(e)
        #print(df.head())
        f = int(file.split('_')[-1].split('.')[0])
        sliced_races[f] = df
    print('Einlesen der sliced Dateien erfolgreich')
else:
    print('Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')
    
if os.path.exists('split_data'):
    csv_filenames = []
    #auslesen aller csv file dateinamen aus formula 1 datensatz und abspeichern in liste
    for filename in os.listdir(os.getcwd()+'/split_data'):
        typ = filename.split('.')[-1]
        name = filename.split('.')[0]
        if typ == 'csv':
            csv_filenames.append(filename)
    split_by_race = {}
    #einlesen und abspeichern als dataframe aller dateien
    for file in csv_filenames:
        try:
            df = pd.read_csv('split_data/'+file, engine = 'python', sep = ';', decimal = '.')
            del df['Unnamed: 0']
        except Exception as e:
            df = pd.read_csv('split_data/'+file, engine = 'c', sep = ';', decimal = '.')
            del df['Unnamed: 0']
            print(e)
        #print(df.head())
        f = int(file.split('_')[-1].split('.')[0])
        split_by_race[f] = df
    print('Einlesen der split Dateien erfolgreich')
else:
    print('Dateien können nicht eingelesen werden, da kein entsprechendes Verzeichnis existiert!')

Einlesen der sliced Dateien erfolgreich
Einlesen der split Dateien erfolgreich


In [4]:
sliced_races[841].head(60)

Unnamed: 0,raceId,year,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,podium_position,constructor_name,stop_binary,...,constructorId_206.0,constructorId_207.0,constructorId_208.0,constructorId_209.0,constructorId_210.0,status_clean_DNF,status_clean_Finished,status_clean_lapped,driverId,sum_milliseconds_pro_lap
0,841.0,2011.0,Australian Grand Prix,1.0,2.0,100573.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,100573.0
1,841.0,2011.0,Australian Grand Prix,2.0,2.0,93774.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,194347.0
2,841.0,2011.0,Australian Grand Prix,3.0,2.0,92900.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,287247.0
3,841.0,2011.0,Australian Grand Prix,4.0,2.0,92582.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,379829.0
4,841.0,2011.0,Australian Grand Prix,5.0,2.0,92471.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,472300.0
5,841.0,2011.0,Australian Grand Prix,6.0,2.0,92434.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,564734.0
6,841.0,2011.0,Australian Grand Prix,7.0,2.0,92447.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,657181.0
7,841.0,2011.0,Australian Grand Prix,8.0,2.0,92310.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,749491.0
8,841.0,2011.0,Australian Grand Prix,9.0,2.0,92612.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,842103.0
9,841.0,2011.0,Australian Grand Prix,10.0,2.0,93121.0,Lewis Hamilton,2.0,McLaren,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,935224.0


In [23]:
temp = split_by_race[841]

In [29]:
#setzen der prozentualen Grenze, ab der weiterer Verlauf des Rennens vorhergesagt werden soll
border = 0.7

for race in split_by_race.values():
    
    #finden der lap_number wo race_completion die angegebene border überschreitet
    last_lap_num = race.where(race.race_completion == min(race.where(race.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]
    
    #es werden nur daten aus race genommen, die bis zu dieser lap_number gehen
    race_shortened = race.where(race.lap_number < last_lap_num).dropna(how = 'all')
    race_shortened.reset_index(inplace = True, drop = True)
    

In [24]:
temp.where(temp.race_completion == min(temp.where(temp.race_completion >= border).dropna(how = 'all')['race_completion'])).dropna(how = 'all')['lap_number'].unique()[0]

41.0

# ANN Try

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random

In [12]:
x = temp[['year', 'circuitId', 'driverId',
       'lap_number', 'lap_position', 'lap_in_milliseconds', 'driver_fullname',
       'constructorId', 'stop_binary',
       'race_completion', 'grid', 'status_clean',
       'form']]
y = temp['podium_position']

In [25]:
encoded_df = hot_encode_top(['circuitId','constructorId', 'driverId', 'year', "status_clean"], x, 5)
encoded_df.shape

(1083, 26)

In [26]:
print(encoded_df.columns)
encoded_df.head()

Index(['raceId', 'grandprix_name', 'lap_number', 'lap_position',
       'lap_in_milliseconds', 'driver_fullname', 'constructor_name',
       'stop_binary', 'race_completion', 'grid', 'form', 'circuitId_1.0',
       'constructorId_15.0', 'constructorId_9.0', 'constructorId_6.0',
       'constructorId_1.0', 'constructorId_4.0', 'driverId_1.0',
       'driverId_155.0', 'driverId_18.0', 'driverId_808.0', 'driverId_815.0',
       'year_2011.0', 'status_clean_Finished', 'status_clean_lapped',
       'status_clean_DNF'],
      dtype='object')


Unnamed: 0,raceId,grandprix_name,lap_number,lap_position,lap_in_milliseconds,driver_fullname,constructor_name,stop_binary,race_completion,grid,...,constructorId_4.0,driverId_1.0,driverId_155.0,driverId_18.0,driverId_808.0,driverId_815.0,year_2011.0,status_clean_Finished,status_clean_lapped,status_clean_DNF
0,841.0,Australian Grand Prix,1.0,2.0,100573.0,Lewis Hamilton,McLaren,0.0,0.017241,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
1,841.0,Australian Grand Prix,2.0,2.0,93774.0,Lewis Hamilton,McLaren,0.0,0.034483,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
2,841.0,Australian Grand Prix,3.0,2.0,92900.0,Lewis Hamilton,McLaren,0.0,0.051724,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
3,841.0,Australian Grand Prix,4.0,2.0,92582.0,Lewis Hamilton,McLaren,0.0,0.068966,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0
4,841.0,Australian Grand Prix,5.0,2.0,92471.0,Lewis Hamilton,McLaren,0.0,0.086207,2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1,0,0


In [27]:
x_tensor = torch.tensor(encoded_df[['lap_number', 'lap_position',
       'lap_in_milliseconds',
       'stop_binary', 'race_completion', 'grid', 'form', 'circuitId_1.0',
       'constructorId_15.0', 'constructorId_9.0', 'constructorId_6.0',
       'constructorId_1.0', 'constructorId_4.0', 'driverId_1.0',
       'driverId_155.0', 'driverId_18.0', 'driverId_808.0', 'driverId_815.0',
       'year_2011.0', 'status_clean_Finished', 'status_clean_lapped',
       'status_clean_DNF']].values)

In [32]:
x_tensor.size()

torch.Size([1083, 22])

In [43]:
lin1 = nn.Linear(22, 40)
lin1(x_tensor.float())

tensor([[ 15269.8994,  -3026.0515,  -2752.2817,  ...,  -9950.7588,
         -10450.6045, -12089.6338],
        [ 14237.4775,  -2821.4431,  -2566.3311,  ...,  -9278.1924,
          -9744.3271, -11272.1572],
        [ 14104.6475,  -2795.1162,  -2542.5476,  ...,  -9191.8955,
          -9653.7158, -11166.8906],
        ...,
        [ 14452.6826,  -2863.9412,  -2611.0034,  ...,  -9426.6699,
          -9907.6748, -11442.5918],
        [ 14586.6182,  -2890.4822,  -2635.3091,  ...,  -9514.2246,
          -9999.6348, -11548.5244],
        [ 14954.8271,  -2963.4519,  -2701.8469,  ...,  -9754.4561,
         -10251.9287, -11839.9326]], grad_fn=<AddmmBackward>)

In [28]:
train_data = []
nogo_columns = ['year', 'podium_position', 'raceId',
                'grandprix_name', 'driver_fullname',
               'constructor_name', #'total_laps',
               #'status_clean', 'constructorId',
                'total_milliseconds',
               'lap_in_milliseconds']
for key, value in sliced_races.items():
    for did in value.driverId.unique():
        temp = value.where(value.driverId == did).dropna(how = "all")
        temp_y = list((temp["total_milliseconds"]/1000)/60)
        #temp_y = temp_y[0]
        cols = [col for col in temp.columns if col not in nogo_columns]
        temp_x = temp[cols]
        stops=temp_x.sum(axis = 0)[2] #Addierte anzahl an stops bis zu dem Zeitpunkt
        temp_x = temp_x.tail(1)
        temp_x['stop_binary'] = stops
        x_tensor = torch.tensor(temp_x[temp_x.columns].values)
        #temp_x = x_tensor.float()
        train_data.append((x_tensor, [temp_y[0]]))
random.shuffle(train_data)
test_data = train_data[2400:]
train_data = train_data[0:2400]
        
        #break
        #for i, row in temp.iterrows():

In [15]:
test_data[0][1]

IndexError: list index out of range

In [272]:
cols

['lap_number',
 'lap_position',
 'stop_binary',
 'total_laps',
 'race_completion',
 'grid',
 'form',
 'circuitId_1.0',
 'circuitId_2.0',
 'circuitId_3.0',
 'circuitId_4.0',
 'circuitId_5.0',
 'circuitId_6.0',
 'circuitId_7.0',
 'circuitId_9.0',
 'circuitId_10.0',
 'circuitId_11.0',
 'circuitId_12.0',
 'circuitId_13.0',
 'circuitId_14.0',
 'circuitId_15.0',
 'circuitId_17.0',
 'circuitId_18.0',
 'circuitId_20.0',
 'circuitId_22.0',
 'circuitId_24.0',
 'circuitId_32.0',
 'circuitId_35.0',
 'circuitId_68.0',
 'circuitId_69.0',
 'circuitId_70.0',
 'circuitId_71.0',
 'circuitId_73.0',
 'driverId_1.0',
 'driverId_2.0',
 'driverId_3.0',
 'driverId_4.0',
 'driverId_5.0',
 'driverId_8.0',
 'driverId_10.0',
 'driverId_13.0',
 'driverId_15.0',
 'driverId_16.0',
 'driverId_17.0',
 'driverId_18.0',
 'driverId_20.0',
 'driverId_22.0',
 'driverId_24.0',
 'driverId_30.0',
 'driverId_37.0',
 'driverId_39.0',
 'driverId_67.0',
 'driverId_153.0',
 'driverId_154.0',
 'driverId_155.0',
 'driverId_807.0',
 

In [217]:
temp_x.sum(axis = 0)[2]

0.0

In [202]:
temp_x.columns

Index(['lap_number', 'lap_position', 'stop_binary', 'total_laps',
       'race_completion', 'grid', 'form', 'circuitId_1.0', 'circuitId_2.0',
       'circuitId_3.0',
       ...
       'constructorId_206.0', 'constructorId_207.0', 'constructorId_208.0',
       'constructorId_209.0', 'constructorId_210.0', 'status_clean_DNF',
       'status_clean_Finished', 'status_clean_lapped', 'driverId',
       'sum_milliseconds_pro_lap'],
      dtype='object', length=112)

In [86]:
train_data[0]

(tensor([[1.0000e+00, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          1.0057e+05],
         [2.0000e+00, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          1.9435e+05],
         [3.0000e+00, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          2.8725e+05],
         ...,
         [3.8000e+01, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          3.5673e+06],
         [3.9000e+01, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          3.6576e+06],
         [4.0000e+01, 2.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
          3.7488e+06]], dtype=torch.float64), 5392556.0)

In [37]:
class Netz(nn.Module):
    def __init__(self):
        super(Netz,self).__init__()
        self.fc1 = nn.Linear(112, 200)
        self.fc2 = nn.Linear(200, 250)
        self.fc3 = nn.Linear(250, 150)
        self.fc4 = nn.Linear(150, 100)
        self.fc5 = nn.Linear(100, 50)
        self.fc6 = nn.Linear(50, 30)
        self.fc7 = nn.Linear(30, 1)
        #self.dropout = nn.Dropout()
        
        
    def forward(self,x):
        x = self.fc1(x.float())
        x = F.tanh(x.float())
        x = self.fc2(x.float())
        x = F.tanh(x.float())
        #x = self.conv_dropout(x)
        x = self.fc3(x.float())
        x = F.tanh(x.float())
        x = self.fc4(x.float())
        x = F.tanh(x.float())
        x = self.fc5(x.float())
        x = F.tanh(x.float())
        x = self.fc6(x.float())
        x = F.tanh(x.float())
        x = self.fc7(x.float())
        return x
        

In [38]:

model = Netz()
model.cuda()
        

optimizer = optim.SGD(model.parameters(), lr = 0.0001)        
def train(epoch):
    model.train()
    batch_id = 0
    for data, target in train_data:
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        optimizer.zero_grad()
        out = model(data)
        #print("Out: ", out, out.size())
        #print("Target: ", target, target.size())
        criterion = nn.MSELoss()
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        print("Train Epoche: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epoch, batch_id *len(data), len(train_data),
        100. * batch_id / len(train_data), loss.item()))
        batch_id +=1
        
    random.shuffle(train_data)
        

In [39]:
for epoch in range(1,2):
    train(epoch)































In [40]:
def test():
    for data, target in test_data:
        model.eval()
        #files.listdir(path)
        data = data.cuda()
        target = torch.Tensor(target).unsqueeze(0)
        shape = target.size()[1]
        target = target.resize(shape,1).cuda()
        out = model(data)
        #print(data)
        #print(data["driverId"])
        #print("Output: ", out)
        #print("Target: ", target)
        print("Difference: ", out - target)
test()

Difference:  tensor([[-9.0344]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-6.5451]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[9.0306]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-3.4593]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-4.7287]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-2.3289]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-9.4648]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-5.7713]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-3.6491]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-7.4107]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-1.6716]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[56.2749]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[5.3323]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[73.4397]], device=

Difference:  tensor([[13.8391]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[1.8974]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[16.6034]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-8.7893]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-13.4770]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-0.1213]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-9.6435]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[5.2556]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-2.4106]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[7.5145]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[20.0965]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-70.7396]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-6.3827]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-11.5628]], devic

Difference:  tensor([[-11.6254]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[12.9233]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-3.4101]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-4.6023]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[69.4086]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-10.2800]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[0.3013]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-2.7775]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[13.5551]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-4.4639]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[13.8788]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-3.7131]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[1.3030]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-25.3360]], devi

Difference:  tensor([[-9.1866]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-4.0180]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-11.3771]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[92.7856]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[10.3086]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-2.6681]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[92.2285]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-7.1206]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-8.3078]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[3.1818]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[1.2687]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[4.2172]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-70.5435]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-0.8075]], device

Difference:  tensor([[-6.5625]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[3.3527]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[85.6307]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[3.2437]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[37.9175]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[5.3531]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-35.4782]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-1.3934]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-5.2277]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-2.5585]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-6.3533]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[3.1772]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[-25.1011]], device='cuda:0', grad_fn=<SubBackward0>)
Difference:  tensor([[1.5052]], device='

In [171]:
a = torch.Tensor(temp_y).unsqueeze(0)

In [167]:
a = torch.flip(a, [1,-2])

In [189]:
a.size()

torch.Size([1, 38])

In [174]:
a.size()

torch.Size([1, 38])

In [176]:
a.resize(38,1)

tensor([[5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.],
        [5834464.]])