# Importing necesary Libraries

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import __version__
import cufflinks as cf
import plotly.graph_objs as go 
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 
%matplotlib inline
cf.go_offline()

# importing our data

In [2]:
circuits = pd.read_csv('circuits.csv')
circuits.drop(axis = 1,columns = ['url'],inplace = True)
circuits.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130


In [3]:
driver_standings = pd.read_csv('driver_standings.csv')
driver_standings.head()

Unnamed: 0,driverStandingsId,raceId,driverId,points,position,positionText,wins
0,1,18,1,10.0,1,1,1
1,2,18,2,8.0,2,2,0
2,3,18,3,6.0,3,3,0
3,4,18,4,5.0,4,4,0
4,5,18,5,4.0,5,5,0


In [4]:
drivers = pd.read_csv('drivers.csv')
drivers.drop(axis = 1,columns = ['url'],inplace = True)
drivers.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish


In [5]:
# creating a new column for fullname
drivers['fullname'] = drivers['forename'] +' ' +drivers['surname']

In [17]:
#grouping data according to nationality
z = drivers.groupby('nationality').count()
z.reset_index(inplace=True)
z = z[['nationality','code']]
z.rename(columns={'code':'count'},inplace=True)
z = pd.DataFrame(z)

In [21]:
z.head()

Unnamed: 0,nationality,count
0,American,157
1,American-Italian,1
2,Argentine,24
3,Argentine-Italian,1
4,Australian,17


In [30]:
px.bar(data_frame=z,x=z.nlargest(10,'count')['nationality'],y=z.nlargest(10,'count')['count'],text=z.nlargest(10,'count')['count']
       ,labels={'x':'Nationality', 'y':'Total players'},color=z.nlargest(10,'count')['nationality'])

As we can see Formula 1 drivers are mostly dominated by British,American and Italians

# Number of races occured over the years

In [32]:
races = pd.read_csv('races.csv')
races.drop(axis = 1,columns =['url'],inplace = True)
races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00


In [33]:
race_count = races.groupby('year').count().reset_index()[['year','round']]
race_count.head()

Unnamed: 0,year,round
0,1950,7
1,1951,8
2,1952,8
3,1953,9
4,1954,9


In [34]:
race_count.iplot(kind = 'line',x = 'year',y = 'round',xTitle = 'year',yTitle = 'no.of races occured')

as we can see there has been a increase in number of races occured over the years because of increase in popularity and building of more tracks and constructors ,
also there is a decline in no of races drastically in 2020 due to covid-19 pandemic

# Tracks around the world

In [36]:
ok = races.groupby('circuitId').count().reset_index()
ok.head()

Unnamed: 0,circuitId,raceId,year,round,name,date,time
0,1,25,25,25,25,25,25
1,2,19,19,19,19,19,19
2,3,18,18,18,18,18,18
3,4,31,31,31,31,31,31
4,5,8,8,8,8,8,8


In [37]:
circuit = pd.merge(ok,circuits,how='inner',on='circuitId')[['circuitId','circuitRef','name_y','location','country','lat']]
circuit.head()

Unnamed: 0,circuitId,circuitRef,name_y,location,country,lat
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517


In [38]:
F = circuit.groupby('country').count().reset_index()[['country','location']]
F.head()

Unnamed: 0,country,location
0,Argentina,1
1,Australia,2
2,Austria,3
3,Azerbaijan,1
4,Bahrain,1


In [39]:
data = dict(
        type = 'choropleth',
        colorscale = 'Viridis',
        reversescale = True,
        locations = F['country'],
        locationmode = "country names",
        z = F['location'],
        text = F['country'],
        colorbar = {'title' : 'distribution'},
      ) 

layout = dict(title = 'Race tracks around the world',
                geo = dict(showframe = False)
             )

In [41]:
choromap = go.Figure(data = [data],layout = layout)
iplot(choromap,validate=False)