# Pandas Basics

Date: 2024/08/08

暫くFlaskのプログラミングやってたので、Pandasのスキルが下がった。ここで復習する。

## SeriesからDataFrame作成

In [1]:
import pandas as pd

s1 = pd.Series([0, 1, 2, 3], index=['A', 'B', 'C', 'D'])
s1

A    0
B    1
C    2
D    3
dtype: int64

In [2]:
s2 = pd.Series([4, 5, 6, 7], index=['A', 'B', 'C', 'D'], name='Y')
s2

A    4
B    5
C    6
D    7
Name: Y, dtype: int64

In [3]:
df = s2.to_frame('X')
df['Y'] = s1
df

Unnamed: 0,X,Y
A,4,0
B,5,1
C,6,2
D,7,3


In [4]:
pd.DataFrame([s2])

Unnamed: 0,A,B,C,D
Y,4,5,6,7


## 複数のSeriesからDataFrame作成

In [5]:
pd.DataFrame({'XX': s1, 'YY': s2})

Unnamed: 0,XX,YY
A,0,4
B,1,5
C,2,6
D,3,7


## Index操作

In [6]:
df = pd.DataFrame({'col0': [0, 1, 2, 3], 'col1': [4, 5, 6, 7], 'col2': [8, 9, 10, 11]}, index=['row0', 'row1', 'row2', 'row3'])
df

Unnamed: 0,col0,col1,col2
row0,0,4,8
row1,1,5,9
row2,2,6,10
row3,3,7,11


In [7]:
df.col0

row0    0
row1    1
row2    2
row3    3
Name: col0, dtype: int64

In [8]:
df.loc['row1':'row2','col2']

row1     9
row2    10
Name: col2, dtype: int64

In [9]:
df.iloc[1:3,2]

row1     9
row2    10
Name: col2, dtype: int64

## dtypes

In [10]:
df = pd.DataFrame({'col0': [0, 'John', 2, 3.3], 'col1': [4, 5, 'Paul', 7], 'col2': ['Geroge', 'Ringo', 10, 11], 'col3': [10,11,12,13]}, index=['row0', 'row1', 'row2', 'row3'])
df

Unnamed: 0,col0,col1,col2,col3
row0,0,4,Geroge,10
row1,John,5,Ringo,11
row2,2,Paul,10,12
row3,3.3,7,11,13


In [11]:
df.dtypes

col0    object
col1    object
col2    object
col3     int64
dtype: object

In [12]:
df.loc['row1', 'col0'] = 4.0
df.loc['row2', 'col1'] = 4
df.loc['row0', 'col2'] = 6
df.loc['row1', 'col2'] = 5

In [13]:
df

Unnamed: 0,col0,col1,col2,col3
row0,0.0,4,6,10
row1,4.0,5,5,11
row2,2.0,4,10,12
row3,3.3,7,11,13


In [14]:
df.col0 = df.col0.astype(float)
df.col1 = df.col1.astype(int)
df.col2 = df.col2.astype(int)
df.dtypes

col0    float64
col1      int64
col2      int64
col3      int64
dtype: object

#　行列計算

In [15]:
df

Unnamed: 0,col0,col1,col2,col3
row0,0.0,4,6,10
row1,4.0,5,5,11
row2,2.0,4,10,12
row3,3.3,7,11,13


In [16]:
df1 = df.astype(float)
df1

Unnamed: 0,col0,col1,col2,col3
row0,0.0,4.0,6.0,10.0
row1,4.0,5.0,5.0,11.0
row2,2.0,4.0,10.0,12.0
row3,3.3,7.0,11.0,13.0


In [17]:
df2 = df1.copy()
df2.loc['row0', 'col0'] = 9.5
df2.loc['row1', 'col2'] = -3.5
df2

Unnamed: 0,col0,col1,col2,col3
row0,9.5,4.0,6.0,10.0
row1,4.0,5.0,-3.5,11.0
row2,2.0,4.0,10.0,12.0
row3,3.3,7.0,11.0,13.0


In [18]:
df1 * df2

Unnamed: 0,col0,col1,col2,col3
row0,0.0,16.0,36.0,100.0
row1,16.0,25.0,-17.5,121.0
row2,4.0,16.0,100.0,144.0
row3,10.89,49.0,121.0,169.0


In [19]:
df1 + df2

Unnamed: 0,col0,col1,col2,col3
row0,9.5,8.0,12.0,20.0
row1,8.0,10.0,1.5,22.0
row2,4.0,8.0,20.0,24.0
row3,6.6,14.0,22.0,26.0


In [20]:
df1['col4'] = df1['col0'] * df1['col3']
df1

Unnamed: 0,col0,col1,col2,col3,col4
row0,0.0,4.0,6.0,10.0,0.0
row1,4.0,5.0,5.0,11.0,44.0
row2,2.0,4.0,10.0,12.0,24.0
row3,3.3,7.0,11.0,13.0,42.9


In [21]:
# データ分析やっていると、この種の計算が多い。
# ループしなくても一発で計算できる

df1.loc[:,'col2':'col3'].mul(df1['col4'], axis=0)

Unnamed: 0,col2,col3
row0,0.0,0.0
row1,220.0,484.0
row2,240.0,288.0
row3,471.9,557.7
