### In this notebook, I will :
- clean the dataframe containing weather informations for each 35 destinations
- sort this data on new variables
- create a new dataframe containing only the 5 best destinations
- plot a map showing these 5 city position

In [1]:
# For information, here is the meaning of each weather code I will use (among other variables) 
# to sort best destinations 

# Code 	        Description
# 0 	        Clear sky
# 1, 2, 3 	    Mainly clear, partly cloudy, and overcast
# 45, 48 	    Fog and depositing rime fog
# 51, 53, 55 	Drizzle: Light, moderate, and dense intensity
# 56, 57 	    Freezing Drizzle: Light and dense intensity
# 61, 63, 65 	Rain: Slight, moderate and heavy intensity
# 66, 67 	    Freezing Rain: Light and heavy intensity
# 71, 73, 75 	Snow fall: Slight, moderate, and heavy intensity
# 77 	        Snow grains
# 80, 81, 82 	Rain showers: Slight, moderate, and violent
# 85, 86 	    Snow showers slight and heavy
# 95 * 	        Thunderstorm: Slight or moderate
# 96, 99 * 	    Thunderstorm with slight and heavy hail

In [16]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

import plotly.express as px

In [3]:
df = pd.read_csv('weather_to_sort.csv')

In [4]:
df.drop(labels='Unnamed: 0', axis=1, inplace=True)

In [5]:
cols = df.columns.tolist()

In [6]:
cols = ['cityname',
 'latitude',
 'longitude',
 'temperature_j1',
 'precipitation_j1',
 'weathercode_j1',
 'temperature_j2',
 'precipitation_j2',
 'weathercode_j2',
 'temperature_j3',
 'precipitation_j3',
 'weathercode_j3',
 'temperature_j4',
 'precipitation_j4',
 'weathercode_j4',
 'temperature_j5',
 'precipitation_j5',
 'weathercode_j5',
 'temperature_j6',
 'precipitation_j6',
 'weathercode_j6',
 'temperature_j7',
 'precipitation_j7',
 'weathercode_j7',
 ]

In [7]:
df = df[cols]

In [8]:
df.shape

(35, 24)

In [9]:
df.head(5)

Unnamed: 0,cityname,latitude,longitude,temperature_j1,precipitation_j1,weathercode_j1,temperature_j2,precipitation_j2,weathercode_j2,temperature_j3,precipitation_j3,weathercode_j3,temperature_j4,precipitation_j4,weathercode_j4,temperature_j5,precipitation_j5,weathercode_j5,temperature_j6,precipitation_j6,weathercode_j6,temperature_j7,precipitation_j7,weathercode_j7
0,Mont+Saint+Michel,48.635954,-1.51146,15.6,0.0,3,11.9,0.6,61,9.7,0.0,2,9.7,0.0,3,12.9,0.0,3,13.7,0.0,3,11.6,0.0,3
1,St+Malo,48.649518,-2.026041,15.5,0.0,3,10.6,1.5,61,9.1,0.0,3,9.8,0.0,3,13.1,0.0,3,13.5,0.0,3,11.5,0.1,61
2,Bayeux,49.276462,-0.702474,16.2,0.0,3,12.0,0.3,61,9.6,0.0,1,9.6,0.0,3,13.1,0.0,3,12.7,0.0,3,11.4,0.1,61
3,Le+Havre,49.493898,0.107973,15.5,0.0,2,11.7,0.5,61,9.3,0.1,3,9.4,0.0,3,12.7,0.0,3,11.2,0.0,3,11.9,0.1,3
4,Rouen,49.440459,1.093966,16.2,0.0,3,11.4,0.5,61,9.8,0.0,2,9.2,0.0,3,11.3,0.0,3,10.5,0.0,3,11.8,0.1,3


### Creating the variables I have chosen to sort my data

In [10]:
df['mean_temp'] = df.iloc[:,3::3].mean(axis=1)
df['sum_precipitation'] = df.iloc[:,4::3].sum(axis=1)
df['top_weather_code'] = df.iloc[:, 5::3].max(axis=1)

In [11]:
df.head()

Unnamed: 0,cityname,latitude,longitude,temperature_j1,precipitation_j1,weathercode_j1,temperature_j2,precipitation_j2,weathercode_j2,temperature_j3,precipitation_j3,weathercode_j3,temperature_j4,precipitation_j4,weathercode_j4,temperature_j5,precipitation_j5,weathercode_j5,temperature_j6,precipitation_j6,weathercode_j6,temperature_j7,precipitation_j7,weathercode_j7,mean_temp,sum_precipitation,top_weather_code
0,Mont+Saint+Michel,48.635954,-1.51146,15.6,0.0,3,11.9,0.6,61,9.7,0.0,2,9.7,0.0,3,12.9,0.0,3,13.7,0.0,3,11.6,0.0,3,12.157143,0.6,61
1,St+Malo,48.649518,-2.026041,15.5,0.0,3,10.6,1.5,61,9.1,0.0,3,9.8,0.0,3,13.1,0.0,3,13.5,0.0,3,11.5,0.1,61,11.871429,1.6,61
2,Bayeux,49.276462,-0.702474,16.2,0.0,3,12.0,0.3,61,9.6,0.0,1,9.6,0.0,3,13.1,0.0,3,12.7,0.0,3,11.4,0.1,61,12.085714,0.4,61
3,Le+Havre,49.493898,0.107973,15.5,0.0,2,11.7,0.5,61,9.3,0.1,3,9.4,0.0,3,12.7,0.0,3,11.2,0.0,3,11.9,0.1,3,11.671429,0.7,61
4,Rouen,49.440459,1.093966,16.2,0.0,3,11.4,0.5,61,9.8,0.0,2,9.2,0.0,3,11.3,0.0,3,10.5,0.0,3,11.8,0.1,3,11.457143,0.6,61


### Visualizing the 35 destinations

In [26]:
fig = px.scatter_mapbox(df, 
                        lat=df['latitude'],
                        lon=df['longitude'],
                        #center=
                        zoom= 4,
                        color=df['mean_temp'],
                        color_continuous_scale=px.colors.sequential.Bluered,
                        mapbox_style='open-street-map',
                        width=750,
                        height=500,
                        size=df['mean_temp'],

                        )   
fig.show()

### Sorting the 5 best destinations

In [12]:
df.columns

Index(['cityname', 'latitude', 'longitude', 'temperature_j1',
       'precipitation_j1', 'weathercode_j1', 'temperature_j2',
       'precipitation_j2', 'weathercode_j2', 'temperature_j3',
       'precipitation_j3', 'weathercode_j3', 'temperature_j4',
       'precipitation_j4', 'weathercode_j4', 'temperature_j5',
       'precipitation_j5', 'weathercode_j5', 'temperature_j6',
       'precipitation_j6', 'weathercode_j6', 'temperature_j7',
       'precipitation_j7', 'weathercode_j7', 'mean_temp', 'sum_precipitation',
       'top_weather_code'],
      dtype='object')

### I have chosen to sort my data as follows:
- 1/ the smallest weather code during the whole 7 days
- 2/ the smallest sum precipitations during the whole 7 days
- 3/ the higher mean temperature on this period

In [13]:
df_sorted = df.sort_values(['top_weather_code','sum_precipitation','mean_temp'], 
                            ascending=[True, True, False],
                            ignore_index=True,
                            )

In [14]:
best_destinations = df_sorted.iloc[:5,:]
best_destinations.shape

(5, 27)

In [15]:
best_destinations.head(10)

Unnamed: 0,cityname,latitude,longitude,temperature_j1,precipitation_j1,weathercode_j1,temperature_j2,precipitation_j2,weathercode_j2,temperature_j3,precipitation_j3,weathercode_j3,temperature_j4,precipitation_j4,weathercode_j4,temperature_j5,precipitation_j5,weathercode_j5,temperature_j6,precipitation_j6,weathercode_j6,temperature_j7,precipitation_j7,weathercode_j7,mean_temp,sum_precipitation,top_weather_code
0,Bayonne,43.494514,-1.473666,19.3,0.0,3,21.5,0.0,3,13.5,0.0,3,13.3,0.0,1,13.7,0.0,3,16.0,0.0,0,5.1,0.0,1,14.628571,0.0,3
1,Cassis,43.214036,5.539632,16.2,0.0,0,16.6,0.0,3,15.4,0.0,3,14.5,0.0,3,14.6,0.0,0,12.4,0.0,0,10.5,0.0,2,14.314286,0.0,3
2,Biarritz,43.471144,-1.552727,18.4,0.0,3,20.7,0.0,3,13.7,0.0,2,12.9,0.0,1,13.2,0.0,3,15.5,0.0,0,4.7,0.0,1,14.157143,0.0,3
3,Marseille,43.296174,5.369953,17.5,0.0,0,17.4,0.0,3,15.4,0.0,3,14.5,0.0,3,12.7,0.0,0,11.1,0.0,0,8.5,0.0,2,13.871429,0.0,3
4,Aix+en+Provence,43.529842,5.447474,17.1,0.0,1,16.3,0.0,3,15.9,0.0,3,12.7,0.0,3,12.7,0.0,0,10.4,0.0,0,6.7,0.0,2,13.114286,0.0,3


### Saving this dataframe for the next steps

In [32]:
# best_destinations.to_csv('best_destinations_dataframe.csv', index=False)

### Visualizing the 35 destinations

In [31]:
fig = px.scatter_mapbox(df, 
                        lat=best_destinations['latitude'],
                        lon=best_destinations['longitude'],
                        #center=
                        zoom= 4,
                        color=best_destinations['mean_temp'],
                        color_continuous_scale=px.colors.sequential.Oryel,
                        mapbox_style='open-street-map',
                        width=750,
                        height=500,
                        size=best_destinations['mean_temp'],

                        )   
fig.show()