In [11]:
%reset -f
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("display.max_rows", 8)
plt.rcParams['figure.figsize'] = (9, 6)

## Create a [DataFrame](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)

In [12]:
dates = pd.date_range('20130101', periods=6)
pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

Unnamed: 0,A,B,C,D
2013-01-01,-0.951275,0.903052,0.089309,1.546373
2013-01-02,-0.821362,0.278894,-3.330041,-0.163082
2013-01-03,0.783076,-0.725296,0.34305,2.281231
2013-01-04,-1.059778,-2.421516,0.24483,-0.825643
2013-01-05,0.658944,1.424658,0.890757,-1.349098
2013-01-06,-0.015887,-0.564237,-0.164435,0.684723


In [13]:
pd.DataFrame({'A' : 1.,
              'B' : pd.Timestamp('20130102'),
              'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
              'D' : np.arange(4,dtype='int32'),
              'E' : pd.Categorical(["test","train","test","train"]),
              'F' : 'foo' })


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,0,test,foo
1,1.0,2013-01-02,1.0,1,train,foo
2,1.0,2013-01-02,1.0,2,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


# Load Data from CSV File

In [14]:
url = "https://www.fun-mooc.fr/c4x/agrocampusouest/40001S03/asset/AnaDo_JeuDonnees_TemperatFrance.csv"
french_cities = pd.read_csv(url, delimiter=";", encoding="latin1", index_col=0)
french_cities

Unnamed: 0,Janv,Févr,Mars,Avri,Mai,Juin,juil,Août,Sept,Octo,Nove,Déce,Lati,Long,Moye,Ampl,Région
Bordeaux,5.6,6.6,10.3,12.8,15.8,19.3,20.9,21.0,18.6,13.8,9.1,6.2,44.50,-0.34,13.33,15.4,SO
Brest,6.1,5.8,7.8,9.2,11.6,14.4,15.6,16.0,14.7,12.0,9.0,7.0,48.24,-4.29,10.77,10.2,NO
Clermont,2.6,3.7,7.5,10.3,13.8,17.3,19.4,19.1,16.2,11.2,6.6,3.6,45.47,3.05,10.94,16.8,SE
Grenoble,1.5,3.2,7.7,10.6,14.5,17.8,20.1,19.5,16.7,11.4,6.5,2.3,45.10,5.43,10.98,18.6,SE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Rennes,4.8,5.3,7.9,10.1,13.1,16.2,17.9,17.8,15.7,11.6,7.8,5.4,48.05,-1.41,11.13,13.1,NO
Strasbourg,0.4,1.5,5.6,9.8,14.0,17.2,19.0,18.3,15.1,9.5,4.9,1.3,48.35,7.45,9.72,18.6,NE
Toulouse,4.7,5.6,9.2,11.6,14.9,18.7,20.9,20.9,18.3,13.3,8.6,5.5,43.36,1.26,12.68,16.2,SO
Vichy,2.4,3.4,7.1,9.9,13.6,17.1,19.3,18.8,16.0,11.0,6.6,3.4,46.08,3.26,10.72,16.9,SE


### Exercise 1

- Use `head`and `tail` to display the begin and the end of the dataframe.
- Set index name to "City"
- Rename DataFrame Months in English. DataFrame columns are defined in a dictionary.

In [15]:
import locale, calendar

locale.setlocale(locale.LC_ALL,'C')
months = calendar.month_abbr
print(*months)

 Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec


### Exercise 2
- Display `Lati` column
- `.loc` and `.iloc` allow to access individual values, slices or masked selections:
    - Display mean temperature in Rennes during September 
    - Display mean temperature in Rennes during September and December
    - Display mean temperature in Rennes from September to December
- Append a new column with standard deviation of temperature along the year.
- Remove this new column.
- Compute maximum value of `Mean` column.
- Compute maximum value of `Ampl` column.
- Convert the temperature mean from Celsius to Fahrenheit degree by creating a function $fahrenheit(T) = T*9/5+32$ and apply it to the column `Mean`.
- Sort observation by `Latitude` with northern cities on top.
- Sort observation by `Latitude` with southern cities on top..

### Exercise 3

- Keep only the months columns and transpose the dataframe. Create a new dataframe called `city_temp` and plot the results using `plot` and `boxplot`.
- Use `describe`, `unique` and `value_counts` to the column `Region`.
- Convert the `Region` column  categorical type.
- Use `group_by` to display cities by `Region`.