# Using pandas

In [1]:
import pandas as pd
import numpy as np

## Series

A serie is basically a single column of a spreadsheet (dataframe)
Let's create some series:

In [4]:
mydata = [1, 3, 5, 7, 9]
mydata2 = [x for x in range(0,10) if x % 2 == 0]
myser = pd.Series(data=mydata)

In [6]:
print(mydata)
print(mydata2)

[1, 3, 5, 7, 9]
[0, 2, 4, 6, 8]


In [7]:
print(myser)

0    1
1    3
2    5
3    7
4    9
dtype: int64


In [8]:
myserarray = np.array(mydata2)
myser2 = pd.Series(myserarray)
print(myser2)

0    0
1    2
2    4
3    6
4    8
dtype: int32


In [10]:
# with index labels
myindex = ['honda', 'ford', 'opel', 'fiat', 'lux']
myser3 = pd.Series(myserarray, myindex)
print(myser3)

honda    0
ford     2
opel     4
fiat     6
lux      8
dtype: int32


### Accessing elements

In [11]:
print(myser3['fiat'])

6


### Do math element-wise

In [12]:
print(myser3*3.0)

honda     0.0
ford      6.0
opel     12.0
fiat     18.0
lux      24.0
dtype: float64


## Creating DataFrames by combining series

In [24]:
ser1 = pd.Series([1, 2.3, 4.5, 6.7], ['opel', 'ford', 'fiat', 'lux'])
ser2 = pd.Series([3.4, 3.2, 3.3, 2.2], ['opel', 'fiat', 'ford', 'lux'])
myframe = pd.concat([ser1, ser2], axis=1)
print(myframe)

        0    1
opel  1.0  3.4
ford  2.3  3.3
fiat  4.5  3.2
lux   6.7  2.2


In [30]:
print(myframe[0])
print(myframe[1])

opel    1.0
ford    2.3
fiat    4.5
lux     6.7
Name: 0, dtype: float64
opel    3.4
ford    3.3
fiat    3.2
lux     2.2
Name: 1, dtype: float64


## Creating DataFrames with random data

In [32]:
myframe2 = pd.DataFrame(np.random.rand(5,5))
print(myframe2)

          0         1         2         3         4
0  0.074801  0.772561  0.851184  0.391396  0.846016
1  0.268100  0.202687  0.671945  0.602477  0.810833
2  0.418786  0.458297  0.060113  0.211585  0.018091
3  0.434126  0.207649  0.694073  0.019169  0.875566
4  0.231700  0.070585  0.824286  0.791974  0.455876


In [33]:
myframe3 = pd.DataFrame(data=np.random.randn(5,5), index=['day1', 'day2', 'day3', 'day4', 'day5'], columns=['fiat', 'opel', 'ford', 'toyota', 'lux'])
print(myframe3)

          fiat      opel      ford    toyota       lux
day1  0.383862  1.184795  1.233391 -1.409819 -0.175223
day2 -0.995241 -0.171044  0.337925 -1.386632 -0.357476
day3 -0.972147  1.091055 -0.840739  0.258189  0.885440
day4  0.709033 -0.212156  0.605999  0.993640 -0.805394
day5  0.036126 -0.335380  0.783355 -0.588851 -0.434726


In [34]:
myframe3.to_pickle("dataframe.pd") # Save the dataframe