# High-level introduction to <code>pandas</code>
For a more complete introduction to <code>pandas</code>, see [https://pandas.pydata.org/](https://pandas.pydata.org/).

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Let's start from a dictionary

In [20]:
data = {
    'normal': np.random.normal(),
    'poisson': np.random.poisson(),
    'power': np.random.power(4),
    'test': .4
}

In [21]:
data

{'normal': -1.6819264871938433,
 'poisson': 0,
 'power': 0.7334288857032495,
 'test': 0.4}

In [22]:
s = pd.Series(data)

In [73]:
x = pd.Series(np.random.normal(size=10), index=["A{}".format(i) for i in range(1, 11)])
y = pd.Series(np.random.normal(size=10), index=["A{}".format(i) for i in range(1, 11)])
z = pd.Series(np.random.normal(size=10), index=["A{}".format(i+1) for i in range(1, 11)])

In [74]:
x

A1    -1.481735
A2    -0.530766
A3     0.741117
A4    -0.236254
A5     0.324146
A6     0.065857
A7     1.101097
A8    -2.149840
A9    -0.112824
A10   -1.435166
dtype: float64

In [75]:
z

A2    -0.665768
A3    -1.137023
A4    -0.944024
A5     1.480276
A6     1.866718
A7    -0.087496
A8     0.082559
A9    -2.368238
A10   -0.849144
A11   -1.323347
dtype: float64

## DataFrame

In [82]:
df = pd.DataFrame([x, y, z], index=['K', 'J', 'N']).T

In [85]:
df.fillna(df.mean())

Unnamed: 0,K,J,N
A1,-1.481735,0.34456,-0.394549
A2,-0.530766,0.042843,-0.665768
A3,0.741117,-0.029433,-1.137023
A4,-0.236254,1.6091,-0.944024
A5,0.324146,0.41094,1.480276
A6,0.065857,0.407629,1.866718
A7,1.101097,1.249623,-0.087496
A8,-2.14984,0.93575,0.082559
A9,-0.112824,0.118366,-2.368238
A10,-1.435166,-0.154789,-0.849144


## Create from dictionaries

In [87]:
data_list = []
for i in range(4):
    d = {
        'normal': np.random.normal(),
        'poisson': np.random.poisson(),
        'power': np.random.power(4),
        'test': .4
    }
    data_list.append(d)

{'normal': 0.4968019506553213, 'poisson': 0, 'power': 0.7736863493930786, 'test': 0.4}


In [88]:
data_list = [{
    'normal': np.random.normal(),
    'poisson': np.random.poisson(),
    'power': np.random.power(4),
    'test': .4
} for i in range(4)]

In [91]:
df = pd.DataFrame(data_list, index=["Esperimento {}".format(i+1) for i in range(len(data_list))])

In [97]:
df

Unnamed: 0,normal,poisson,power,test
Esperimento 1,-1.508587,0,0.9272,0.4
Esperimento 2,0.317827,0,0.8436,0.4
Esperimento 3,0.033654,1,0.985012,0.4
Esperimento 4,-0.106855,1,0.704291,0.4


In [96]:
df.loc[['Esperimento 2', 'Esperimento 3']][['power', 'test']]

Unnamed: 0,power,test
Esperimento 2,0.8436,0.4
Esperimento 3,0.985012,0.4


## Selection

In [103]:
df[df['normal'] > 0]

Unnamed: 0,normal,poisson,power,test
Esperimento 2,0.317827,0,0.8436,0.4
Esperimento 3,0.033654,1,0.985012,0.4


In [107]:
df[(df['power'] > 0.8) | (df['normal'] > 0)][['poisson', 'normal']]

Unnamed: 0,poisson,normal
Esperimento 1,0,-1.508587
Esperimento 2,0,0.317827
Esperimento 3,1,0.033654


## From dict of dict

In [108]:
auto = {
    'persona 1': {
        'targa': 'XYZ456',
        'marca': 'fiat'
    },
    'persona 2': {
        'targa': 'KJZ456',
        'marca': 'bmw'
    },
    'persona 3': {
        'targa': 'OKZ456',
        'marca': 'audi'
    }
}

In [111]:
df = pd.DataFrame(auto).T

In [112]:
df

Unnamed: 0,targa,marca
persona 1,XYZ456,fiat
persona 2,KJZ456,bmw
persona 3,OKZ456,audi


# Data interaction (file, sql, etc.)