In [1]:
import numpy as np
import pandas as pd
import time
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neural_network import MLPClassifier

In [2]:
#Yaris verilerini okutma
df_races = pd.read_csv('data/races.csv')
df_races = df_races.drop(columns=['fp1_date','fp1_time','fp2_date','fp2_time','fp3_date','fp3_time','quali_date','quali_time','sprint_date','sprint_time','time'])
df_races = df_races.sort_values(by=['year','round'])
df_races = df_races[df_races['year'] < 2023]
df_races.reset_index(inplace = True, drop = True)
df_races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,url
0,833,1950,1,9,British Grand Prix,1950-05-13,http://en.wikipedia.org/wiki/1950_British_Gran...
1,834,1950,2,6,Monaco Grand Prix,1950-05-21,http://en.wikipedia.org/wiki/1950_Monaco_Grand...
2,835,1950,3,19,Indianapolis 500,1950-05-30,http://en.wikipedia.org/wiki/1950_Indianapolis...
3,836,1950,4,66,Swiss Grand Prix,1950-06-04,http://en.wikipedia.org/wiki/1950_Swiss_Grand_...
4,837,1950,5,13,Belgian Grand Prix,1950-06-18,http://en.wikipedia.org/wiki/1950_Belgian_Gran...


In [3]:
#Pist bilgilerini okutma
df_circuits = pd.read_csv('data/circuits.csv')
df_circuits = df_circuits.set_index('circuitId')
df_circuits.head()

Unnamed: 0_level_0,circuitRef,name,location,country,lat,lng,alt,url
circuitId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


In [4]:
#Yaris bilgilerindeki idleri isimler ile degistirme
for i in range(len(df_races.index)):
    x = df_races.iloc[i]['circuitId']
    df_races.at[i,'circuitId'] = df_circuits.loc[x]['circuitRef']
    df_races.at[i,'lat'] = df_circuits.loc[x]['lat']
    df_races.at[i,'lng'] = df_circuits.loc[x]['lng'] 
    df_races.at[i,'country'] = df_circuits.loc[x]['country']   
    
df_races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,url,lat,lng,country
0,833,1950,1,silverstone,British Grand Prix,1950-05-13,http://en.wikipedia.org/wiki/1950_British_Gran...,52.0786,-1.01694,UK
1,834,1950,2,monaco,Monaco Grand Prix,1950-05-21,http://en.wikipedia.org/wiki/1950_Monaco_Grand...,43.7347,7.42056,Monaco
2,835,1950,3,indianapolis,Indianapolis 500,1950-05-30,http://en.wikipedia.org/wiki/1950_Indianapolis...,39.795,-86.2347,USA
3,836,1950,4,bremgarten,Swiss Grand Prix,1950-06-04,http://en.wikipedia.org/wiki/1950_Swiss_Grand_...,46.9589,7.40194,Switzerland
4,837,1950,5,spa,Belgian Grand Prix,1950-06-18,http://en.wikipedia.org/wiki/1950_Belgian_Gran...,50.4372,5.97139,Belgium


In [5]:
#Yil ve round bilgisini alma
race = pd.read_csv('data/races.csv')
race = race.sort_values(by=['year','round'])
rounds = []
for a in np.array(race.year.unique()):
    rounds.append([a, list(race[race.year == a]['round'])])
print(rounds[:5])

[[1950, [1, 2, 3, 4, 5, 6, 7]], [1951, [1, 2, 3, 4, 5, 6, 7, 8]], [1952, [1, 2, 3, 4, 5, 6, 7, 8]], [1953, [1, 2, 3, 4, 5, 6, 7, 8, 9]], [1954, [1, 2, 3, 4, 5, 6, 7, 8, 9]]]


In [6]:
#Sonuclar verisetini okutma
df_results = pd.read_csv('data/results.csv')
df_results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.3,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1


In [7]:
#Hesaplama sirasinda kullanilmayacak sutunlari atma
df_results = df_results.drop(columns=['number','position','positionText','laps','time','rank','fastestLapTime','fastestLapSpeed','resultId', 'fastestLap'])
df_results.head()

Unnamed: 0,raceId,driverId,constructorId,grid,positionOrder,points,milliseconds,statusId
0,18,1,1,1,1,10.0,5690616,1
1,18,2,2,5,2,8.0,5696094,1
2,18,3,3,7,3,6.0,5698779,1
3,18,4,4,11,4,5.0,5707797,1
4,18,5,1,3,5,4.0,5708630,1


In [8]:
#Verisetlerinde bulunan status bilgilerini duzenlemek icin veriseti alma
df_status = pd.read_csv('data/status.csv')
df_status = df_status.set_index('statusId')
df_status.head()

Unnamed: 0_level_0,status
statusId,Unnamed: 1_level_1
1,Finished
2,Disqualified
3,Accident
4,Collision
5,Engine


In [9]:
#Constructor bilgilerini duzenlemek icin verisetini olusturma
df_cons = pd.read_csv('data/constructors.csv')
df_cons = df_cons.set_index('constructorId')
df_cons.head()

Unnamed: 0_level_0,constructorRef,name,nationality,url
constructorId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


In [10]:
#Orijinal yaris verisetini bozmamak icin kopyalama
df_races2 = df_races.copy()
df_races2 = df_races2.set_index('raceId')
df_races2 = df_races2.sort_values(by=['raceId'])
df_races2.head()

Unnamed: 0_level_0,year,round,circuitId,name,date,url,lat,lng,country
raceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2009,1,albert_park,Australian Grand Prix,2009-03-29,http://en.wikipedia.org/wiki/2009_Australian_G...,-37.8497,144.968,Australia
2,2009,2,sepang,Malaysian Grand Prix,2009-04-05,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...,2.76083,101.738,Malaysia
3,2009,3,shanghai,Chinese Grand Prix,2009-04-19,http://en.wikipedia.org/wiki/2009_Chinese_Gran...,31.3389,121.22,China
4,2009,4,bahrain,Bahrain Grand Prix,2009-04-26,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...,26.0325,50.5106,Bahrain
5,2009,5,catalunya,Spanish Grand Prix,2009-05-10,http://en.wikipedia.org/wiki/2009_Spanish_Gran...,41.57,2.26111,Spain


In [11]:
df_races = df_races.drop(columns=['raceId'])

In [12]:
#Surucu bilgilerini tutma
df_drivers = pd.read_csv('data/drivers.csv')
df_drivers = df_drivers.set_index('driverId')
df_drivers.head()

Unnamed: 0_level_0,driverRef,number,code,forename,surname,dob,nationality,url
driverId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


In [13]:
#Sonuclar verisetini daha okunabilir hale getirmek icin diger verisetlerinden bilgilerle guncelleme
for i in range(len(df_results.index)):
    stat = df_results.iloc[i]['statusId']
    cs = df_results.iloc[i]['constructorId']
    rrac = df_results.iloc[i]['raceId']
    ddId = df_results.iloc[i]['driverId']
    
    df_results.at[i, 'statusId'] = df_status.loc[stat, 'status']
    df_results.at[i, 'constructorId'] = df_cons.loc[cs, 'constructorRef']
    
    df_results.at[i, 'year'] = df_races2.loc[rrac, 'year']
    df_results.at[i, 'round'] = df_races2.loc[rrac, 'round']
    df_results.at[i, 'circuitId'] = df_races2.loc[rrac, 'circuitId']
    df_results.at[i, 'url'] = df_races2.loc[rrac, 'url']

    df_results.at[i, 'driverId'] = df_drivers.loc[ddId, 'driverRef']
    df_results.at[i, 'date_of_birth'] = df_drivers.loc[ddId, 'dob']
    df_results.at[i, 'nationality'] = df_drivers.loc[ddId, 'nationality']

df_results.head()

Unnamed: 0,raceId,driverId,constructorId,grid,positionOrder,points,milliseconds,statusId,year,round,circuitId,url,date_of_birth,nationality
0,18,hamilton,mclaren,1,1,10.0,5690616,Finished,2008.0,1.0,albert_park,http://en.wikipedia.org/wiki/2008_Australian_G...,1985-01-07,British
1,18,heidfeld,bmw_sauber,5,2,8.0,5696094,Finished,2008.0,1.0,albert_park,http://en.wikipedia.org/wiki/2008_Australian_G...,1977-05-10,German
2,18,rosberg,williams,7,3,6.0,5698779,Finished,2008.0,1.0,albert_park,http://en.wikipedia.org/wiki/2008_Australian_G...,1985-06-27,German
3,18,alonso,renault,11,4,5.0,5707797,Finished,2008.0,1.0,albert_park,http://en.wikipedia.org/wiki/2008_Australian_G...,1981-07-29,Spanish
4,18,kovalainen,mclaren,3,5,4.0,5708630,Finished,2008.0,1.0,albert_park,http://en.wikipedia.org/wiki/2008_Australian_G...,1981-10-19,Finnish


In [14]:
#Sonuclar verisetini siralama ve duzenleme
df_results = df_results.sort_values(by=['year','round', 'positionOrder'])
df_results.reset_index(inplace = True, drop = True)
df_results.head()

Unnamed: 0,raceId,driverId,constructorId,grid,positionOrder,points,milliseconds,statusId,year,round,circuitId,url,date_of_birth,nationality
0,833,farina,alfa,1,1,9.0,8003600,Finished,1950.0,1.0,silverstone,http://en.wikipedia.org/wiki/1950_British_Gran...,1906-10-30,Italian
1,833,fagioli,alfa,2,2,6.0,8006200,Finished,1950.0,1.0,silverstone,http://en.wikipedia.org/wiki/1950_British_Gran...,1898-06-09,Italian
2,833,reg_parnell,alfa,4,3,4.0,8055600,Finished,1950.0,1.0,silverstone,http://en.wikipedia.org/wiki/1950_British_Gran...,1911-07-02,British
3,833,cabantous,lago,6,4,3.0,\N,+2 Laps,1950.0,1.0,silverstone,http://en.wikipedia.org/wiki/1950_British_Gran...,1904-10-08,French
4,833,rosier,lago,9,5,2.0,\N,+2 Laps,1950.0,1.0,silverstone,http://en.wikipedia.org/wiki/1950_British_Gran...,1905-11-05,French


In [15]:
df_results = df_results.astype({'year': 'int', 'round': 'int'})
df_results = df_results.iloc[:,[8,9,10,1,12,13,2,3,6,7,5,4,11]]
df_results.head()

Unnamed: 0,year,round,circuitId,driverId,date_of_birth,nationality,constructorId,grid,milliseconds,statusId,points,positionOrder,url
0,1950,1,silverstone,farina,1906-10-30,Italian,alfa,1,8003600,Finished,9.0,1,http://en.wikipedia.org/wiki/1950_British_Gran...
1,1950,1,silverstone,fagioli,1898-06-09,Italian,alfa,2,8006200,Finished,6.0,2,http://en.wikipedia.org/wiki/1950_British_Gran...
2,1950,1,silverstone,reg_parnell,1911-07-02,British,alfa,4,8055600,Finished,4.0,3,http://en.wikipedia.org/wiki/1950_British_Gran...
3,1950,1,silverstone,cabantous,1904-10-08,French,lago,6,\N,+2 Laps,3.0,4,http://en.wikipedia.org/wiki/1950_British_Gran...
4,1950,1,silverstone,rosier,1905-11-05,French,lago,9,\N,+2 Laps,2.0,5,http://en.wikipedia.org/wiki/1950_British_Gran...


In [16]:
#Surucu sonuclarini duzenleme
df_drivst = pd.read_csv('data/driver_standings.csv')
df_drivst = df_drivst.drop(columns=['positionText', 'driverStandingsId'])
df_drivst.head()

Unnamed: 0,raceId,driverId,points,position,wins
0,18,1,10.0,1,1
1,18,2,8.0,2,0
2,18,3,6.0,3,0
3,18,4,5.0,4,0
4,18,5,4.0,5,0


In [17]:
#Surucu sonuclarini daha anlamli hale getirme
for i in range(len(df_drivst.index)):
    ddId = df_drivst.iloc[i]['driverId']
    df_drivst.at[i, 'driverId'] = df_drivers.loc[ddId, 'driverRef']
    
    rrac = df_drivst.iloc[i]['raceId']
    df_drivst.at[i, 'year'] = df_races2.loc[rrac, 'year']
    df_drivst.at[i, 'round'] = df_races2.loc[rrac, 'round']
    
df_drivst.head()

Unnamed: 0,raceId,driverId,points,position,wins,year,round
0,18,hamilton,10.0,1,1,2008.0,1.0
1,18,heidfeld,8.0,2,0,2008.0,1.0
2,18,rosberg,6.0,3,0,2008.0,1.0
3,18,alonso,5.0,4,0,2008.0,1.0
4,18,kovalainen,4.0,5,0,2008.0,1.0


In [18]:
#Surucu sonuclarini duzenleme
df_drivst = df_drivst.astype({'year': 'int', 'round': 'int'})
df_drivst = df_drivst.iloc[:,[5,6,1,2,4,3]]
df_drivst = df_drivst.sort_values(by=['year','round', 'position'])
df_drivst.reset_index(inplace = True, drop = True)
df_drivst.head()

Unnamed: 0,year,round,driverId,points,wins,position
0,1950,1,farina,9.0,1,1
1,1950,1,fagioli,6.0,0,2
2,1950,1,reg_parnell,4.0,0,3
3,1950,1,cabantous,3.0,0,4
4,1950,1,rosier,2.0,0,5


In [19]:
#Daha sonra kullanilmak uzere skor hesaplama
def lookup (df, team, points):
    df['lookup1'] = df.year.astype(str) + df[team] + df['round'].astype(str)
    df['lookup2'] = df.year.astype(str) + df[team] + (df['round']-1).astype(str)
    new_df = df.merge(df[['lookup1', points]], how = 'left', left_on='lookup2',right_on='lookup1')
    new_df.drop(['lookup1_x', 'lookup2', 'lookup1_y'], axis = 1, inplace = True)
    new_df.rename(columns = {points+'_x': points+'_after_race', points+'_y': points}, inplace = True)
    new_df[points].fillna(0, inplace = True)
    return new_df

In [20]:
df_drivst = lookup(df_drivst, 'driverId', 'points')
df_drivst = lookup(df_drivst, 'driverId', 'wins')
df_drivst = lookup(df_drivst, 'driverId', 'position')
df_drivst.head()

Unnamed: 0,year,round,driverId,points_after_race,wins_after_race,position_after_race,points,wins,position
0,1950,1,farina,9.0,1,1,0.0,0.0,0.0
1,1950,1,fagioli,6.0,0,2,0.0,0.0,0.0
2,1950,1,reg_parnell,4.0,0,3,0.0,0.0,0.0
3,1950,1,cabantous,3.0,0,4,0.0,0.0,0.0
4,1950,1,rosier,2.0,0,5,0.0,0.0,0.0


In [21]:
#Takim sonuclarini duzenleme
df_const_st = pd.read_csv('data/constructor_standings.csv')
df_const_st = df_const_st.drop(columns=['positionText', 'constructorStandingsId'])
df_const_st.head()

Unnamed: 0,raceId,constructorId,points,position,wins
0,18,1,14.0,1,1
1,18,2,8.0,3,0
2,18,3,9.0,2,0
3,18,4,5.0,4,0
4,18,5,2.0,5,0


In [22]:
#Takim sonuclarini anlamlandirma
for i in range(len(df_const_st.index)):
    cs = df_const_st.iloc[i]['constructorId']
    df_const_st.at[i, 'constructorId'] = df_cons.loc[cs, 'constructorRef']
    
    rrac = df_const_st.iloc[i]['raceId']
    df_const_st.at[i, 'year'] = df_races2.loc[rrac, 'year']
    df_const_st.at[i, 'round'] = df_races2.loc[rrac, 'round']

df_const_st.head()

Unnamed: 0,raceId,constructorId,points,position,wins,year,round
0,18,mclaren,14.0,1,1,2008.0,1.0
1,18,bmw_sauber,8.0,3,0,2008.0,1.0
2,18,williams,9.0,2,0,2008.0,1.0
3,18,renault,5.0,4,0,2008.0,1.0
4,18,toro_rosso,2.0,5,0,2008.0,1.0


In [23]:
#Veri tipi duzeltme
df_const_st = df_const_st.astype({'year': 'int', 'round': 'int'})
df_const_st = df_const_st.iloc[:,[5,6,1,2,4,3]]
df_const_st = df_const_st.sort_values(by=['year','round', 'position'])
df_const_st.reset_index(inplace = True, drop = True)
df_const_st.head()

Unnamed: 0,year,round,constructorId,points,wins,position
0,1958,1,cooper,8.0,1,1
1,1958,1,ferrari,6.0,0,2
2,1958,1,maserati,3.0,0,3
3,1958,2,cooper,16.0,2,1
4,1958,2,ferrari,12.0,0,2


In [24]:
#Takim skorlarini ekleme
df_const_st = lookup(df_const_st, 'constructorId', 'points')
df_const_st = lookup(df_const_st, 'constructorId', 'wins')
df_const_st = lookup(df_const_st, 'constructorId', 'position')
df_const_st.head()

Unnamed: 0,year,round,constructorId,points_after_race,wins_after_race,position_after_race,points,wins,position
0,1958,1,cooper,8.0,1,1,0.0,0.0,0.0
1,1958,1,ferrari,6.0,0,2,0.0,0.0,0.0
2,1958,1,maserati,3.0,0,3,0.0,0.0,0.0
3,1958,2,cooper,16.0,2,1,8.0,1.0,1.0
4,1958,2,ferrari,12.0,0,2,6.0,0.0,2.0


In [25]:
#qualifying sonuclarini yazdirma
df_qualifying = pd.read_csv('data/qualifying_results.csv')

In [26]:
df_qualifying.head()

Unnamed: 0,grid_position,driver_name,car,qualifying_time,season,round
0,1,Keke Rosberg ROS,Williams Honda,1:34.526,1983,1
1,2,Alain Prost PRO,Renault,1:34.672,1983,1
2,3,Patrick Tambay TAM,Ferrari,1:34.758,1983,1
3,4,Nelson Piquet PIQ,Brabham BMW,1:35.114,1983,1
4,5,Derek Warwick WAR,Toleman Hart,1:35.206,1983,1


In [27]:
#Verisetlerini birlestirmek icin sutun isimlerini duzeltme
df_qualifying.rename(columns = {'grid_position': 'grid', 'season': 'year'}, inplace = True)

In [28]:
df_qualifying.head()

Unnamed: 0,grid,driver_name,car,qualifying_time,year,round
0,1,Keke Rosberg ROS,Williams Honda,1:34.526,1983,1
1,2,Alain Prost PRO,Renault,1:34.672,1983,1
2,3,Patrick Tambay TAM,Ferrari,1:34.758,1983,1
3,4,Nelson Piquet PIQ,Brabham BMW,1:35.114,1983,1
4,5,Derek Warwick WAR,Toleman Hart,1:35.206,1983,1


In [29]:
#Hava durumu bilgisi
df_weather = pd.read_csv('data/weather.csv')
df_weather.head()

Unnamed: 0,season,round,circuit_id,weather,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy
0,1950,1,silverstone,"Sunny, Mild, Dry",0,0,1,0,0
1,1950,2,monaco,Soleggiato,1,0,0,0,0
2,1950,3,indianapolis,Rainy,0,0,0,1,0
3,1950,4,bremgarten,"Warm, dry and sunny",1,0,1,0,0
4,1950,5,spa,"Warm, dry and sunny",1,0,1,0,0


In [30]:
#Verileri birlestirma

In [31]:
df_races = df_races.iloc[:,[0,1,2,6,7,8,4,5]]
df_races.head()

Unnamed: 0,year,round,circuitId,lat,lng,country,date,url
0,1950,1,silverstone,52.0786,-1.01694,UK,1950-05-13,http://en.wikipedia.org/wiki/1950_British_Gran...
1,1950,2,monaco,43.7347,7.42056,Monaco,1950-05-21,http://en.wikipedia.org/wiki/1950_Monaco_Grand...
2,1950,3,indianapolis,39.795,-86.2347,USA,1950-05-30,http://en.wikipedia.org/wiki/1950_Indianapolis...
3,1950,4,bremgarten,46.9589,7.40194,Switzerland,1950-06-04,http://en.wikipedia.org/wiki/1950_Swiss_Grand_...
4,1950,5,spa,50.4372,5.97139,Belgium,1950-06-18,http://en.wikipedia.org/wiki/1950_Belgian_Gran...


In [32]:
#Sutun isim duzenleme
df_results.rename(columns = {'milliseconds': 'time', 'positionOrder': 'podium'}, inplace = True)
df_results.head()

Unnamed: 0,year,round,circuitId,driverId,date_of_birth,nationality,constructorId,grid,time,statusId,points,podium,url
0,1950,1,silverstone,farina,1906-10-30,Italian,alfa,1,8003600,Finished,9.0,1,http://en.wikipedia.org/wiki/1950_British_Gran...
1,1950,1,silverstone,fagioli,1898-06-09,Italian,alfa,2,8006200,Finished,6.0,2,http://en.wikipedia.org/wiki/1950_British_Gran...
2,1950,1,silverstone,reg_parnell,1911-07-02,British,alfa,4,8055600,Finished,4.0,3,http://en.wikipedia.org/wiki/1950_British_Gran...
3,1950,1,silverstone,cabantous,1904-10-08,French,lago,6,\N,+2 Laps,3.0,4,http://en.wikipedia.org/wiki/1950_British_Gran...
4,1950,1,silverstone,rosier,1905-11-05,French,lago,9,\N,+2 Laps,2.0,5,http://en.wikipedia.org/wiki/1950_British_Gran...


In [33]:
df_qualifying.head()

Unnamed: 0,grid,driver_name,car,qualifying_time,year,round
0,1,Keke Rosberg ROS,Williams Honda,1:34.526,1983,1
1,2,Alain Prost PRO,Renault,1:34.672,1983,1
2,3,Patrick Tambay TAM,Ferrari,1:34.758,1983,1
3,4,Nelson Piquet PIQ,Brabham BMW,1:35.114,1983,1
4,5,Derek Warwick WAR,Toleman Hart,1:35.206,1983,1


In [34]:
#Gereksiz sutunlari droplama ve isim duzenleme
df_drivst.drop(['points_after_race', 'wins_after_race', 'position_after_race'] ,axis = 1, inplace = True)
df_drivst.rename(columns = {'points': 'driver_points', 'wins': 'driver_wins', 'position': 'driver_standings_pos'}, inplace = True)

In [35]:
df_drivst.head()

Unnamed: 0,year,round,driverId,driver_points,driver_wins,driver_standings_pos
0,1950,1,farina,0.0,0.0,0.0
1,1950,1,fagioli,0.0,0.0,0.0
2,1950,1,reg_parnell,0.0,0.0,0.0
3,1950,1,cabantous,0.0,0.0,0.0
4,1950,1,rosier,0.0,0.0,0.0


In [36]:
df_const_st.drop(['points_after_race', 'wins_after_race', 'position_after_race'] ,axis = 1, inplace = True)
df_const_st.rename(columns = {'points': 'constructor_points', 'wins': 'constructor_wins','position': 'constructor_standings_pos'}, inplace = True)
df_const_st.head()

Unnamed: 0,year,round,constructorId,constructor_points,constructor_wins,constructor_standings_pos
0,1958,1,cooper,0.0,0.0,0.0
1,1958,1,ferrari,0.0,0.0,0.0
2,1958,1,maserati,0.0,0.0,0.0
3,1958,2,cooper,8.0,1.0,1.0
4,1958,2,ferrari,6.0,0.0,2.0


In [37]:
df_weather.rename(columns = {'season': 'year', 'circuit_id': 'circuitId'}, inplace = True)
df_weather.head()

Unnamed: 0,year,round,circuitId,weather,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy
0,1950,1,silverstone,"Sunny, Mild, Dry",0,0,1,0,0
1,1950,2,monaco,Soleggiato,1,0,0,0,0
2,1950,3,indianapolis,Rainy,0,0,0,1,0
3,1950,4,bremgarten,"Warm, dry and sunny",1,0,1,0,0
4,1950,5,spa,"Warm, dry and sunny",1,0,1,0,0


In [38]:
#Elimizdeki her veri setini belirli sutunlara gore birlestirme
df1 = pd.merge(df_races, df_weather, how='inner', on=['year', 'round', 'circuitId']).drop(['lat', 'lng','country','weather'], axis = 1)
df2 = pd.merge(df1, df_results, how='inner', on=['year', 'round', 'circuitId', 'url']).drop(['url','points', 'statusId', 'time'], axis = 1)

df3 = pd.merge(df2, df_drivst, how='left', on=['year', 'round', 'driverId']) 
df4 = pd.merge(df3, df_const_st, how='left', on=['year', 'round', 'constructorId']) #1958'dem

final_df = pd.merge(df4, df_qualifying, how='inner', on=['year', 'round', 'grid']).drop(['driver_name', 'car'], axis = 1) #1983'den

In [39]:
#Final verisetinin gorunusu
final_df.head()

Unnamed: 0,year,round,circuitId,date,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driverId,...,constructorId,grid,podium,driver_points,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,qualifying_time
0,1983,1,jacarepagua,1983-03-13,0,0,1,0,0,piquet,...,brabham,4,1,0.0,0.0,0.0,0.0,0.0,0.0,1:35.114
1,1983,1,jacarepagua,1983-03-13,0,0,1,0,0,lauda,...,mclaren,9,2,0.0,0.0,0.0,0.0,0.0,0.0,1:36.054
2,1983,1,jacarepagua,1983-03-13,0,0,1,0,0,laffite,...,williams,18,3,0.0,0.0,0.0,0.0,0.0,0.0,1:38.234
3,1983,1,jacarepagua,1983-03-13,0,0,1,0,0,tambay,...,ferrari,3,4,0.0,0.0,0.0,0.0,0.0,0.0,1:34.758
4,1983,1,jacarepagua,1983-03-13,0,0,1,0,0,surer,...,arrows,20,5,0.0,0.0,0.0,0.0,0.0,0.0,1:38.468


In [40]:
#Suruculerin yaslarini hesaplama

from dateutil.relativedelta import *

final_df['date'] = pd.to_datetime(final_df.date)
final_df['date_of_birth'] = pd.to_datetime(final_df.date_of_birth)
final_df['driver_age'] = final_df.apply(lambda x: relativedelta(x['date'], x['date_of_birth']).years, axis=1)
final_df.drop(['date', 'date_of_birth'], axis = 1, inplace = True)

In [41]:
#null degerleri doldurup droplama

for col in ['driver_points', 'driver_wins', 'driver_standings_pos', 'constructor_points', 
            'constructor_wins' , 'constructor_standings_pos']:
    final_df[col].fillna(0, inplace = True)
    final_df[col] = final_df[col].map(lambda x: int(x))
    
final_df.dropna(inplace = True )

In [42]:
#boolean formatina dondurme

for col in ['weather_warm', 'weather_cold','weather_dry', 'weather_wet', 'weather_cloudy']:
    final_df[col] = final_df[col].map(lambda x: bool(x))

In [43]:
#qualifying sureleri arasindaki farklari hesaplama

final_df['qualifying_time'] = final_df.qualifying_time.map(lambda x: 0 if str(x) == '00.000' 
                             else(float(str(x).split(':')[1]) + (60 * float(str(x).split(':')[0])) if x != 0 else 0))
final_df = final_df[final_df['qualifying_time'] != 0]
final_df.sort_values(['year', 'round', 'grid'], inplace = True)
final_df['qualifying_time_diff'] = final_df.groupby(['year', 'round']).qualifying_time.diff()
final_df['qualifying_time'] = final_df.groupby(['year', 'round']).qualifying_time_diff.cumsum().fillna(0)
final_df.drop('qualifying_time_diff', axis = 1, inplace = True)

In [44]:
#dummie degerleri hesaplama

df_dum = pd.get_dummies(final_df, columns = ['circuitId', 'nationality', 'constructorId'] )

for col in df_dum.columns:
    if 'nationality' in col and df_dum[col].sum() < 140:
        df_dum.drop(col, axis = 1, inplace = True)
        
    elif 'constructorId' in col and df_dum[col].sum() < 140:
        df_dum.drop(col, axis = 1, inplace = True)
        
    elif 'circuitId' in col and df_dum[col].sum() < 70:
        df_dum.drop(col, axis = 1, inplace = True)
    
    else:
        pass

In [45]:
df_dum.reset_index(inplace = True, drop = True)
df_dum.head()

Unnamed: 0,year,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driverId,grid,podium,...,constructorId_minardi,constructorId_prost,constructorId_red_bull,constructorId_renault,constructorId_sauber,constructorId_team_lotus,constructorId_toro_rosso,constructorId_toyota,constructorId_tyrrell,constructorId_williams
0,1983,1,False,False,True,False,False,keke_rosberg,1,15,...,0,0,0,0,0,0,0,0,0,1
1,1983,1,False,False,True,False,False,prost,2,6,...,0,0,0,1,0,0,0,0,0,0
2,1983,1,False,False,True,False,False,tambay,3,4,...,0,0,0,0,0,0,0,0,0,0
3,1983,1,False,False,True,False,False,piquet,4,1,...,0,0,0,0,0,0,0,0,0,0
4,1983,1,False,False,True,False,False,warwick,5,7,...,0,0,0,0,0,0,0,0,0,0


In [46]:
np.set_printoptions(precision=4)

In [47]:
#Noron agina sokmadan once train verilerini duzenleme
df = df_dum.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.year <2019]
X_train = train.drop(['driverId', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [53]:
#Modelin isabet skorunu hesaplamak icin fonksiyon
def score_calc(model):
    score = 0
    for circuit in df[df.year == 2019]['round'].unique():

        test = df[(df.year == 2019) & (df['round'] == circuit)]
        X_test = test.drop(['driverId', 'podium'], axis = 1)
        y_test = test.podium

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        #tahmin kismi
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns = ['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop = True)
        prediction_df.sort_values('proba_1', ascending = False, inplace = True)
        prediction_df.reset_index(inplace = True, drop = True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)
        print(prediction_df)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.year == 2019]['round'].unique().max()
    return model_score

In [49]:
#Farkli parametrelerin sonuclarini gormek icin dictionary
comparison_dict ={'model':[],
                  'params': [],
                  'score': []}

In [54]:
model = MLPClassifier(hidden_layer_sizes = (80,20,40,5), activation = 'relu', solver = 'adam', random_state = 1, max_iter=1000)
model.fit(X_train, y_train)
model_score = score_calc(model)

comparison_dict['model'].append('neural_network_classifier')
comparison_dict['params'].append(['(80,20,40,5)', 'relu', 'adam'])
comparison_dict['score'].append(model_score)

     proba_0       proba_1  actual  predicted
0   0.008605  9.913951e-01       0          1
1   0.805938  1.940618e-01       0          0
2   1.000000  3.143686e-08       0          0
3   1.000000  2.009279e-09       1          0
4   1.000000  5.125940e-12       0          0
5   1.000000  4.501636e-13       0          0
6   1.000000  9.182305e-17       0          0
7   1.000000  3.031588e-19       0          0
8   1.000000  1.728952e-28       0          0
9   1.000000  2.689367e-29       0          0
10  1.000000  2.590521e-31       0          0
11  1.000000  1.894444e-31       0          0
12  1.000000  5.154746e-35       0          0
13  1.000000  1.139827e-37       0          0
14  1.000000  5.662725e-38       0          0
15  1.000000  3.457166e-38       0          0
16  1.000000  7.946472e-50       0          0
17  1.000000  3.453355e-52       0          0
18  1.000000  2.454161e-53       0          0
19  1.000000  1.607435e-81       0          0
     proba_0        proba_1  actua

In [55]:
model = MLPClassifier(hidden_layer_sizes = (75, 25, 50, 10), activation = 'identity', solver = 'lbfgs', random_state = 1, max_iter=1000)
model.fit(X_train, y_train)
model_score = score_calc(model)

comparison_dict['model'].append('neural_network_classifier')
comparison_dict['params'].append(['(75, 25, 50, 10)', 'identity', 'lbfgs'])
comparison_dict['score'].append(model_score)

     proba_0   proba_1  actual  predicted
0   0.577098  0.422902       0          1
1   0.821656  0.178344       1          0
2   0.827274  0.172726       0          0
3   0.834147  0.165853       0          0
4   0.963219  0.036781       0          0
5   0.985795  0.014205       0          0
6   0.993740  0.006260       0          0
7   0.995607  0.004393       0          0
8   0.999204  0.000796       0          0
9   0.999209  0.000791       0          0
10  0.999374  0.000626       0          0
11  0.999407  0.000593       0          0
12  0.999575  0.000425       0          0
13  0.999708  0.000292       0          0
14  0.999717  0.000283       0          0
15  0.999762  0.000238       0          0
16  0.999776  0.000224       0          0
17  0.999949  0.000051       0          0
18  0.999963  0.000037       0          0
19  0.999990  0.000010       0          0
     proba_0   proba_1  actual  predicted
0   0.750943  0.249057       1          1
1   0.752160  0.247840       0    

In [52]:
pd.DataFrame(comparison_dict).groupby('model').head()

Unnamed: 0,model,params,score
0,neural_network_classifier,"[(80,20,40,5), relu, adam]",0.47619
1,neural_network_classifier,"[(75, 25, 50, 10), identity, lbfgs]",0.571429
