# DataFrame - 25/09/17

## Import des libraries

In [40]:
import pandas as pd
import numpy as np
import matplotlib as plt
pd.set_option('max_columns', 50)
%matplotlib inline

## Les séries avec la librairie "Panda"

In [41]:
# Création d'une série arbitaire
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'])
s

0                7
1       Heisenberg
2             3.14
3      -1789710578
4    Happy Eating!
dtype: object

In [42]:
# Création d'une série avec spécification d'index
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'],
             index=['A', 'B', 'C', 'Y', 'E'])
s

A                7
B       Heisenberg
C             3.14
Y      -1789710578
E    Happy Eating!
dtype: object

## Les dictionnaires avec la librarie "Panda"

In [59]:
# Création d'un dictionnaire puis, transformation du dictionnaire en série.
d = {'Chicago' : 1000, 'New York' : 1300, 'Portland' : 900, 'San Francisco' : 1100,
     'Austin' : 450, 'Boston' : None}
cities = pd.Series(d)
cities

Austin            450.0
Boston              NaN
Chicago          1000.0
New York         1300.0
Portland          900.0
San Francisco    1100.0
dtype: float64

In [60]:
# Recherche dans le dictionnaire
cities[['Chicago', 'Portland', 'San Francisco']]

Chicago          1000.0
Portland          900.0
San Francisco    1100.0
dtype: float64

# Commandes usuelles

In [61]:
# Sélectionne les villes < 1000
cities[cities < 1000]

Austin      450.0
Portland    900.0
dtype: float64

In [62]:
# Test si les valeurs sont manquantes
cities.notnull()

Austin            True
Boston           False
Chicago           True
New York          True
Portland          True
San Francisco     True
dtype: bool

In [63]:
# Afficher seulement les valeurs manquantes
cities[cities.isnull()]

Boston   NaN
dtype: float64

In [64]:
# Calcul la moyenne des valeurs
cities[~cities.isnull()].mean

<bound method Series.mean of Austin            450.0
Chicago          1000.0
New York         1300.0
Portland          900.0
San Francisco    1100.0
dtype: float64>

In [65]:
# Change la valeur NaN par la moyenne des valeurs
cities[~cities.isnull()].mean
cities[cities.isnull()] = cities.mean()
cities

Austin            450.0
Boston            950.0
Chicago          1000.0
New York         1300.0
Portland          900.0
San Francisco    1100.0
dtype: float64

In [66]:
# Calcul la médiane
cities.median()

975.0

In [67]:
# Ajout de la médiane à la valeur NaN
cities['Boston'] = None
cities[cities.isnull()].mean
cities[cities.isnull()] = cities.median()
cities

Austin            450.0
Boston           1000.0
Chicago          1000.0
New York         1300.0
Portland          900.0
San Francisco    1100.0
dtype: float64

In [68]:
# Changer les villes par les années
cities.index=[2011,2012,2013,2014,2015,2016]
cities

2011     450.0
2012    1000.0
2013    1000.0
2014    1300.0
2015     900.0
2016    1100.0
dtype: float64

In [69]:
# Calcul de l'écart-type
cities.std()

283.57832545289256

In [70]:
# Création d'un dataframe
data = {'years' : [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
       'teams' : ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
       'wins' : [11, 8, 10, 15, 11, 6, 10, 4],
       'losses' : [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data, columns=['years','teams','wins', 'losses'])
football

Unnamed: 0,years,teams,wins,losses
0,2010,Bears,11,5
1,2011,Bears,8,8
2,2012,Bears,10,6
3,2011,Packers,15,1
4,2012,Packers,11,5
5,2010,Lions,6,10
6,2011,Lions,10,6
7,2012,Lions,4,12


In [71]:
# Fusionner les datas football avec la série
cities_df = pd.DataFrame({'years' : cities.index, 'names' : cities.values})
football_merged = football.merge(cities_df, on = 'years')
football_merged

Unnamed: 0,years,teams,wins,losses,names
0,2011,Bears,8,8,450.0
1,2011,Packers,15,1,450.0
2,2011,Lions,10,6,450.0
3,2012,Bears,10,6,1000.0
4,2012,Packers,11,5,1000.0
5,2012,Lions,4,12,1000.0


In [72]:
football = football.set_index('years')
football

Unnamed: 0_level_0,teams,wins,losses
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,Bears,11,5
2011,Bears,8,8
2012,Bears,10,6
2011,Packers,15,1
2012,Packers,11,5
2010,Lions,6,10
2011,Lions,10,6
2012,Lions,4,12
