In [1]:
import numpy as np
import pandas as pd

## 1. np

In [2]:
np.linspace(0, 10, 11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [3]:
np.linspace(0, 10, num=11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

## 2. pd
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html  
https://ithelp.ithome.com.tw/articles/10185922  
https://leemeng.tw/practical-pandas-tutorial-for-aspiring-data-scientists.html

In [2]:
data_own = {'a': np.array([1,2,3]), 'b': np.array([4,5,6]), 'c': np.array([7,8,9])}
df_own = pd.DataFrame(data_own)
df_own

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9


### A-1. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html

In [4]:
d = {'col1': [1, 2], 'col2': [3, 4]}
d

{'col1': [1, 2], 'col2': [3, 4]}

In [5]:
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [6]:
df.dtypes

col1    int64
col2    int64
dtype: object

### A-2.

In [7]:
df = pd.DataFrame(data=d, dtype=np.int8)
df.dtypes

col1    int8
col2    int8
dtype: object

In [8]:
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


### A-3.

In [9]:
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [10]:
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [11]:
data_own = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df_own = pd.DataFrame(data=data_own,
                      columns=['a', 'b', 'c'])
df_own

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


### A-4.

In [12]:
from dataclasses import make_dataclass

Point = make_dataclass("Point", [("x", int), ("y", int)])
pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [13]:
Point

types.Point

### B-1. https://ithelp.ithome.com.tw/articles/10185922

In [14]:
# 截至 2016-12-14 上午 11 時第 8 屆 iT 邦幫忙各組的鐵人分別是 59、9、19、14、6 與 77 人
groups = ["Modern Web", "DevOps", "Cloud", "Big Data", "Security", "自我挑戰組"]
ironmen = [59, 9, 19, 14, 6, 77]

ironmen_dict = {
                "groups": groups,
                "ironmen": ironmen
}

ironmen_df = pd.DataFrame(ironmen_dict)
ironmen_df

Unnamed: 0,groups,ironmen
0,Modern Web,59
1,DevOps,9
2,Cloud,19
3,Big Data,14
4,Security,6
5,自我挑戰組,77


In [15]:
print(ironmen_df.ndim)
print("---") # 分隔線
print(ironmen_df.shape)
print("---") # 分隔線
print(ironmen_df.dtypes)

2
---
(6, 2)
---
groups     object
ironmen     int64
dtype: object


### B-3.

In [16]:
# 選擇欄位
print(ironmen_df.ix[:, "groups"])
print("---") # 分隔線

# 選擇觀測值
print(ironmen_df.ix[0])
print("---") # 分隔線

# 同時選擇欄位與觀測值
print(ironmen_df.ix[0, "groups"])

AttributeError: 'DataFrame' object has no attribute 'ix'

In [17]:
# 選擇欄位
print(ironmen_df.loc[:, "groups"])
print("---") # 分隔線

# 選擇觀測值
print(ironmen_df.loc[0])
print("---") # 分隔線

# 同時選擇欄位與觀測值
print(ironmen_df.loc[0, "groups"])

0    Modern Web
1        DevOps
2         Cloud
3      Big Data
4      Security
5         自我挑戰組
Name: groups, dtype: object
---
groups     Modern Web
ironmen            59
Name: 0, dtype: object
---
Modern Web


### B-myself.

In [18]:
ironmen_df[0:1]

Unnamed: 0,groups,ironmen
0,Modern Web,59


In [19]:
ironmen_df[0:2]

Unnamed: 0,groups,ironmen
0,Modern Web,59
1,DevOps,9


In [20]:
ironmen_df.at[2,'groups']

'Cloud'

In [21]:
ironmen_df.at[3,'ironmen']

14

In [22]:
ironmen_df.iloc[3,1]

14

In [23]:
ironmen_df.iloc[3,0]

'Big Data'

### <font color=red> C-1. Important: https://leemeng.tw/practical-pandas-tutorial-for-aspiring-data-scientists.html </font>

In [19]:
dic = {
    "col 1": [1, 2, 3], 
    "col 2": [10, 20, 30],
    "col 3": list('xyz'),
    "col 4": ['a', 'b', 'c'],
    "col 5": pd.Series(range(3))
}
df = pd.DataFrame(dic)
df

Unnamed: 0,col 1,col 2,col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


In [20]:
rename_dic = {"col 1": "x", "col 2": "10x"}
df.rename(rename_dic, axis=1)

Unnamed: 0,x,10x,col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


In [21]:
df

Unnamed: 0,col 1,col 2,col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


In [22]:
df.columns = ['x(new)', '10x(new)'] + list(df.columns[2:])
df

Unnamed: 0,x(new),10x(new),col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


In [23]:
df

Unnamed: 0,x(new),10x(new),col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


#### Test myself

In [None]:
df.

In [1]:
import numpy as np
import pandas as pd

In [2]:
dic = {
    "col 1": [1, 2, 3], 
    "col 2": [10, 20, 30],
    "col 3": list('xyz'),
    "col 4": ['a', 'b', 'c'],
    "col 5": pd.Series(range(3))
}
df = pd.DataFrame(dic)
df

Unnamed: 0,col 1,col 2,col 3,col 4,col 5
0,1,10,x,a,0
1,2,20,y,b,1
2,3,30,z,c,2


In [3]:
aa = df[(df["col 1"] > 1)]
aa

Unnamed: 0,col 1,col 2,col 3,col 4,col 5
1,2,20,y,b,1
2,3,30,z,c,2


In [4]:
aa = df[(df["col 1"] > 1) & (df["col 1"] < 3)]
aa

Unnamed: 0,col 1,col 2,col 3,col 4,col 5
1,2,20,y,b,1


In [5]:
def faa(x):
    return np.sin(x)

df["new"] = faa(df["col 1"])
df

Unnamed: 0,col 1,col 2,col 3,col 4,col 5,new
0,1,10,x,a,0,0.841471
1,2,20,y,b,1,0.909297
2,3,30,z,c,2,0.14112


In [6]:
def faa2(x):
    return 2*x

df["new again"] = faa2(df["col 1"])
df

Unnamed: 0,col 1,col 2,col 3,col 4,col 5,new,new again
0,1,10,x,a,0,0.841471,2
1,2,20,y,b,1,0.909297,4
2,3,30,z,c,2,0.14112,6


In [7]:
df.iloc[2,1]

30

### Not familiar!!

In [62]:
np.savez_compressed("./jsdlfafghjfgdhjgrflkhjvb", tensor= np.array([[1,2,3,4],[0,1,2,3,4,5]]), onevector=np.array([1,2,3,4,5,6,7,8,9,10]))

In [67]:
aaaaa = np.load("./jsdlfafghjfgdhjgrflkhjvb.npz",allow_pickle=1)

In [68]:
aaaaa.files

['tensor', 'onevector']

In [69]:
aaaaa["tensor"]

array([list([1, 2, 3, 4]), list([0, 1, 2, 3, 4, 5])], dtype=object)

In [70]:
aaaaa.__dict__

{'_files': ['tensor.npy', 'onevector.npy'],
 'files': ['tensor', 'onevector'],
 'allow_pickle': 1,
 'pickle_kwargs': {'encoding': 'ASCII', 'fix_imports': True},
 'zip': <zipfile.ZipFile file=<_io.BufferedReader name='./jsdlfafghjfgdhjgrflkhjvb.npz'> mode='r'>,
 'f': <numpy.lib.npyio.BagObj at 0x7f008c02fc10>,
 'fid': <_io.BufferedReader name='./jsdlfafghjfgdhjgrflkhjvb.npz'>}