In [None]:
# Render our plots inline
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('./bikes.csv', sep=';', encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date')

In [None]:
df[:3]

In [None]:
df['Berri 1']

In [None]:
df['Berri 1'].plot()

In [None]:
df.plot(figsize=(15, 10))

In [None]:
help(df.plot)

<hr>

# What is pandas
**pandas** is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

<h2>Data Structures</h2>
<table border="1" class="colwidths-given docutils">
<colgroup>
<col width="18%">
<col width="24%">
<col width="59%">
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head">Dimensions</th>
<th class="head">Name</th>
<th class="head">Description</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even"><td>1</td>
<td>Series</td>
<td>1D labeled homogeneously-typed array</td>
</tr>
<tr class="row-odd"><td>2</td>
<td>DataFrame</td>
<td>General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column</td>
</tr>
</tbody>
</table>


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Series作成

In [None]:
s = pd.Series([1,3,5,np.nan,6,8])
s

## DataFrame作成

In [None]:
dates = pd.date_range('20130101', periods=6)
dates

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

## DataFrame作成2

In [None]:
df2 = pd.DataFrame({ 'A' : 1.,'B' : pd.Timestamp('20130102'),'C' : pd.Series(1,index=list(range(4)),dtype='float32'), 'D' : np.array([3] * 4,dtype='int32'),'E' : pd.Categorical(["test","train","test","train"]),'F' : 'foo' })
df2

In [None]:
df2.dtypes

In [None]:
dir(df2)

## DataFrame操作

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.index

In [None]:
df.columns

In [None]:
df.values

In [None]:
df.describe()

In [None]:
df.T

In [None]:
df

In [None]:
df.sort_index(axis=1, ascending=False)

In [None]:
df.sort_index(axis=0, ascending=False)

In [None]:
df.sort_values(by='B')

## Index処理

In [None]:
df['A']

In [None]:
df[0:3]

In [None]:
df['20130102':'20130104']

## Selection by Label

In [None]:
df.loc[dates[0]]

In [None]:
df[0:1]

## Selection by Position

In [None]:
df.iloc[3]

In [None]:
df.iloc[3:5,0:2]

In [None]:
df.iloc[[1,2,4],[0,2]]

In [None]:
df.iloc[1:3, :]

In [None]:
df.iloc[:,1:3]

In [None]:
df.iloc[1,1]

In [None]:
df.iat[1,1]

## Boolean Indexing

In [None]:
df[df.A > 0]

In [None]:
df[df > 0]

### Using the [isin()](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.isin.html#pandas.Series.isin) method for filtering

In [None]:
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
df2

In [None]:
df2[df2['E'].isin(['two','four'])]

## Setting

In [None]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
s1

In [None]:
df['F'] = s1
df

In [None]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

## Missing Data
pandas primarily uses the value np.nan to represent missing data. It is by default **not included** in computations. 

In [None]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1],'E'] = 1
df1

In [None]:
df1.dropna(how='any')

In [None]:
df1.fillna(value=5)

In [None]:
pd.isna(df1)

## Stats

In [None]:
df.mean()

In [None]:
df.mean(1)

In [None]:
df.median()

## String Methods

In [None]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s

In [None]:
s.str.lower()

## Merge

### concat

In [None]:
df = pd.DataFrame(np.random.randn(10, 4))
df

In [None]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

In [None]:
pd.concat(pieces)

### join

In [None]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
left

In [None]:
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
right

In [None]:
pd.merge(left, right, on='key')

## append

In [None]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
df

In [None]:
s = df.iloc[3]
df.append(s, ignore_index=True)

## Grouping

In [None]:
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar','foo', 'bar', 'foo', 'foo'],'B' : ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'],'C' : np.random.randn(8),'D' : np.random.randn(8)})
df

In [None]:
df.groupby('A').sum()

In [None]:
df.groupby(['A','B']).sum()

## Reshaping

### stack

### pivot tables

## Time Series

In [None]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts

In [None]:
ts.resample('5Min').sum()

In [None]:
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

In [None]:
ts_utc = ts.tz_localize('UTC')
ts_utc

In [None]:
ts_utc.tz_convert('Asia/Tokyo')

In [None]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
ps = ts.to_period()
ps

In [None]:
ps.to_timestamp()

## Categoricals

In [None]:
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
df

In [None]:
df["grade"] = df["raw_grade"].astype("category")
df

In [None]:
df["grade"].cat.categories = ["very good", "good", "very bad"]
df