In [None]:
%reset -f
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("display.max_rows", 8)
plt.rcParams['figure.figsize'] = (9, 6)

## Create a [DataFrame](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)

In [None]:
dates = pd.date_range('20130101', periods=6)
pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [None]:
pd.DataFrame({'A' : 1.,
              'B' : pd.Timestamp('20130102'),
              'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
              'D' : np.arange(4,dtype='int32'),
              'E' : pd.Categorical(["test","train","test","train"]),
              'F' : 'foo' })


# Load Data from CSV File

In [None]:
url = "https://www.fun-mooc.fr/c4x/agrocampusouest/40001S03/asset/AnaDo_JeuDonnees_TemperatFrance.csv"
french_cities = pd.read_csv(url, delimiter=";", encoding="latin1", index_col=0)
french_cities

### Exercise 14.1

- Use `head`and `tail` to display the begin and the end of the dataframe.
- Set index name to "City"
- Rename DataFrame Months in English. DataFrame columns are defined in a dictionary.

In [None]:
import locale, calendar

locale.setlocale(locale.LC_ALL,'en_US')
months = calendar.month_abbr
print(*months)

### Exercise 14.2
- Display `Lati` column
- `.loc` and `.iloc` allow to access individual values, slices or masked selections:
    - Display mean temperature in Rennes during September 
    - Display mean temperature in Rennes during September and December
    - Display mean temperature in Rennes from September to December
- Append a new column with standard deviation of temperature along the year.
- Remove this new column.
- Compute maximum value of `Mean` column.
- Compute maximum value of `Ampl` column.
- Convert the temperature mean from Celsius to Fahrenheit degree by creating a function $fahrenheit(T) = T*9/5+32$ and apply it to the column `Mean`.
- Sort observation by `Latitude` with northern cities on top.
- Sort observation by `Latitude` with southern cities on top..

### Exercise 14.3

- Keep only the months columns and transpose the dataframe. Create a new dataframe called `city_temp` and plot the results using `plot` and `boxplot`.
- Use `describe`, `unique` and `value_counts` to the column `Region`.
- Convert the `Region` column  categorical type.
- Use `group_by` to display cities by `Region`.

## Transferring R data sets into Python

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
library('missMDA')
data(orange)
estim_ncpPCA(orange)

- conversions of R to pandas objects will be done automatically

In [None]:
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()
r.library('missMDA')
r.data('orange')
orange = r('orange')

In [None]:
orange

In [None]:
from rpy2.robjects.packages import importr
missMDA = importr("missMDA")

In [None]:
missMDA.estim_ncpPCA(orange)

In [None]:
res = missMDA.imputePCA(orange,ncp=2)

In [None]:
res[0]

In [None]:
import rpy2
pd_dataframe = pd.DataFrame(np.array(res[0]),columns=orange.columns)

In [None]:
pd_dataframe

In [None]:
r('library(missMDA)')
r('df <- imputePCA(orange,ncp=2) ')
r('res <- as.data.frame(df$completeObs)')
orange = r('res')
orange