In [1]:
import pandas as pd

# DataFrameの作成

In [2]:
df = pd.DataFrame([['tokyo', 'male', 21, 165, 63],
                                    ['osaka', 'male', 28, 170, 71],
                                    ['fukuoka', 'female', 32, 175, 58],
                                    ['tokyo', 'male', 21, 165, 63],
                                    ['osaka', 'female', 28, 175, 70],
                                    ['fukuoka', 'male', 32, 155, 58],
                                    ['tokyo', 'female', 21, 165, 63],
                                    ['osaka', 'male', 28, 172, 67],
                                    ['fukuoka', 'male', 42, 155, 48]],
                                    columns=['area', 'gender', 'age', 'height', 'weight'])
df

Unnamed: 0,area,gender,age,height,weight
0,tokyo,male,21,165,63
1,osaka,male,28,170,71
2,fukuoka,female,32,175,58
3,tokyo,male,21,165,63
4,osaka,female,28,175,70
5,fukuoka,male,32,155,58
6,tokyo,female,21,165,63
7,osaka,male,28,172,67
8,fukuoka,male,42,155,48


# データの型の確認・型の変換

In [3]:
df.dtypes

area      object
gender    object
age        int64
height     int64
weight     int64
dtype: object

In [4]:
df['height'].dtype

dtype('int64')

In [5]:
df['height'] = df['height'].astype(float)

In [6]:
df['height'].dtype

dtype('float64')

# DataFrameのカラム名の確認

In [7]:
df_columns = list(df.columns)
df_columns

['area', 'gender', 'age', 'height', 'weight']

# 特定の行のデータのみ抽出する

In [8]:
part_of_df1 = df[df['area']=='fukuoka']

part_of_df1

Unnamed: 0,area,gender,age,height,weight
2,fukuoka,female,32,175.0,58
5,fukuoka,male,32,155.0,58
8,fukuoka,male,42,155.0,48


In [9]:
part_of_df2 = df[df['area'].isin(['tokyo', 'osaka'])]

part_of_df2

Unnamed: 0,area,gender,age,height,weight
0,tokyo,male,21,165.0,63
1,osaka,male,28,170.0,71
3,tokyo,male,21,165.0,63
4,osaka,female,28,175.0,70
6,tokyo,female,21,165.0,63
7,osaka,male,28,172.0,67


# インデックスの数字を振り直す

In [10]:
part_of_df1.reset_index(drop=True, inplace=True)

part_of_df1

Unnamed: 0,area,gender,age,height,weight
0,fukuoka,female,32,175.0,58
1,fukuoka,male,32,155.0,58
2,fukuoka,male,42,155.0,48


# カラムの並び順の変更

In [11]:
sorted_columns_list1 = ['height', 'weight', 'gender', 'area', 'age']
sorted_df1 = df[sorted_columns_list1]
sorted_df1

Unnamed: 0,height,weight,gender,area,age
0,165.0,63,male,tokyo,21
1,170.0,71,male,osaka,28
2,175.0,58,female,fukuoka,32
3,165.0,63,male,tokyo,21
4,175.0,70,female,osaka,28
5,155.0,58,male,fukuoka,32
6,165.0,63,female,tokyo,21
7,172.0,67,male,osaka,28
8,155.0,48,male,fukuoka,42


In [12]:
sorted_columns_list2 = ['area', 'gender', 'age']
sorted_df2 = df[sorted_columns_list2]
sorted_df2

Unnamed: 0,area,gender,age
0,tokyo,male,21
1,osaka,male,28
2,fukuoka,female,32
3,tokyo,male,21
4,osaka,female,28
5,fukuoka,male,32
6,tokyo,female,21
7,osaka,male,28
8,fukuoka,male,42


# カラム名の変更

In [13]:
rename_df = df.rename(columns={'area':'エリア',
                        'gender':'性別', 
                        'age':'年齢', 
                        'height':'身長', 
                        'weight':'体重'})

rename_df

Unnamed: 0,エリア,性別,年齢,身長,体重
0,tokyo,male,21,165.0,63
1,osaka,male,28,170.0,71
2,fukuoka,female,32,175.0,58
3,tokyo,male,21,165.0,63
4,osaka,female,28,175.0,70
5,fukuoka,male,32,155.0,58
6,tokyo,female,21,165.0,63
7,osaka,male,28,172.0,67
8,fukuoka,male,42,155.0,48


# 他のデータフレームをUnionする

In [14]:
other_df = pd.DataFrame([['hokkaido', 'male', 25, 162, 60],
                         ['hokkaido', 'female', 38, 179, 81]],
                         columns=['area', 'gender', 'age', 'height', 'weight'])

union_df = df.append(other_df, ignore_index=True)

union_df

Unnamed: 0,area,gender,age,height,weight
0,tokyo,male,21,165.0,63
1,osaka,male,28,170.0,71
2,fukuoka,female,32,175.0,58
3,tokyo,male,21,165.0,63
4,osaka,female,28,175.0,70
5,fukuoka,male,32,155.0,58
6,tokyo,female,21,165.0,63
7,osaka,male,28,172.0,67
8,fukuoka,male,42,155.0,48
9,hokkaido,male,25,162.0,60


# 基本統計量の算出

In [15]:
df.describe()

Unnamed: 0,age,height,weight
count,9.0,9.0,9.0
mean,28.111111,166.333333,62.333333
std,6.845518,7.566373,7.071068
min,21.0,155.0,48.0
25%,21.0,165.0,58.0
50%,28.0,165.0,63.0
75%,32.0,172.0,67.0
max,42.0,175.0,71.0
