Pandas
======

* fast, 
* powerful, 
* flexible 
* "easy to use" 
data analysis and manipulation tool

Tutorials
--------
* <https://pandas.pydata.org/docs/getting_started/index.html#getting-started>


In Pandas, a data table is called a DataFrame.

<img src="https://pandas.pydata.org/docs/_images/01_table_dataframe.svg"/>

In [1]:
import pandas as pd

## Operazioni Base

In [2]:
# A basic data frame with several data
# Note that data are provided in column (or "series")
data = pd.DataFrame({
    "Name": ["Braund, Mr. Owen Harris",
             "Rossi, Sig.ra Miriana",
             "Bergerac, M.me Justine",
             "Wolf, Herr Gustav",
             "Consonni, Sig.ra Susanna"
            ],
    "Age": [22, 35, 58, 34, 56],
    "Sex": ["male", "female", "female", "male", "female"],
    "Citizenship": ["English", "Italian", "French", "German", "Italian"]
    }
)

data

Unnamed: 0,Name,Age,Sex,Citizenship
0,"Braund, Mr. Owen Harris",22,male,English
1,"Rossi, Sig.ra Miriana",35,female,Italian
2,"Bergerac, M.me Justine",58,female,French
3,"Wolf, Herr Gustav",34,male,German
4,"Consonni, Sig.ra Susanna",56,female,Italian


In [3]:
# A basic data frame with several data
# Note that data can be also provided row by row
data = pd.DataFrame([
        ["Braund, Mr. Owen Harris", 22, "male", "English"],
        ["Rossi, Sig.ra Miriana", 35, "female", "Italian"],
        ["Wolf, Herr Gustav", 34, "male", "German"],
        ["Bergerac, M.me Justine", 58, "female", "French"],
        ["Consonni, Sig.ra Susanna", 56, "female", "Italian"]
    ],
    columns=["Name", "Age", "Sex", "Citizenship"]
)

data

Unnamed: 0,Name,Age,Sex,Citizenship
0,"Braund, Mr. Owen Harris",22,male,English
1,"Rossi, Sig.ra Miriana",35,female,Italian
2,"Wolf, Herr Gustav",34,male,German
3,"Bergerac, M.me Justine",58,female,French
4,"Consonni, Sig.ra Susanna",56,female,Italian


In [4]:
# Extract what we would call a "column". It is called a "series" in Pandas jargoon.
age = data["Age"]
print(age)
print(type(age))

0    22
1    35
2    34
3    58
4    56
Name: Age, dtype: int64
<class 'pandas.core.series.Series'>


In [5]:
age.max()

58

In [6]:
age.min()

22

In [7]:
age.mean()

41.0

In [8]:
age.std()

15.491933384829668

In [9]:
data.describe()

Unnamed: 0,Age
count,5.0
mean,41.0
std,15.491933
min,22.0
25%,34.0
50%,35.0
75%,56.0
max,58.0


## Rimozione

### Drop

In [10]:
tmp = pd.DataFrame([ [1, 2, 3, "Hello"], [10, 20, 30, "Data"], [100, 200, 300, "Science"] ], columns=["a", "b", "c", "d"]) 
tmp

Unnamed: 0,a,b,c,d
0,1,2,3,Hello
1,10,20,30,Data
2,100,200,300,Science


In [11]:
# Drop row with index = 1
tmp.drop([1])

Unnamed: 0,a,b,c,d
0,1,2,3,Hello
2,100,200,300,Science


In [23]:
# Drop row with index = 1
tmp.drop(["b", "c"], axis=1)

Unnamed: 0,a,d
0,1,
1,10,Data
2,100,Science


### Dropna

In [21]:
tmp = pd.DataFrame([ [1, 2, 3, None], [10, 20, 30, "Data"], [100, None, 300, "Science"] ], columns=["a", "b", "c", "d"]) 
tmp


Unnamed: 0,a,b,c,d
0,1,2.0,3,
1,10,20.0,30,Data
2,100,,300,Science


In [22]:
tmp.dropna()

Unnamed: 0,a,b,c,d
1,10,20.0,30,Data


## Selezione

In [25]:
# 1st row
sel = data.iloc[0]
print( "row at index 0" )
print( sel )
print( type(sel) )
print( )

# 2nd row
print( "row at index 1" )
print(data.iloc[ 1 ])
print( )

# 3rd row
print( "row at index 2" )
print(data.iloc[ 2 ])


row at index 0
Name           Braund, Mr. Owen Harris
Age                                 22
Sex                               male
Citizenship                    English
Name: 0, dtype: object
<class 'pandas.core.series.Series'>

row at index 1
Name           Rossi, Sig.ra Miriana
Age                               35
Sex                           female
Citizenship                  Italian
Name: 1, dtype: object

row at index 2
Name           Wolf, Herr Gustav
Age                           34
Sex                         male
Citizenship               German
Name: 2, dtype: object


In [15]:

# last row
print(data.shape)
print(data.iloc[data.shape[0]-1])



(5, 4)
Name           Consonni, Sig.ra Susanna
Age                                  56
Sex                              female
Citizenship                     Italian
Name: 4, dtype: object


In [16]:
# rows with indexes in interval [1, 3)
print(data.iloc[1:3])


                    Name  Age     Sex Citizenship
1  Rossi, Sig.ra Miriana   35  female     Italian
2      Wolf, Herr Gustav   34    male      German


In [17]:
# rows with indexes specified in an array
print(data.iloc[ [0, 2, 4] ])

                       Name  Age     Sex Citizenship
0   Braund, Mr. Owen Harris   22    male     English
2         Wolf, Herr Gustav   34    male      German
4  Consonni, Sig.ra Susanna   56  female     Italian
