# 3-1 나만의 데이터 만들기

## 시리즈 만들기

In [1]:
import pandas as pd
s = pd.Series(['banana', 24])
s

0    banana
1        24
dtype: object

### 인덱스 지정하기

In [3]:
s = pd.Series(['Galaxy note9', 100])
s

0    Galaxy note9
1             100
dtype: object

In [5]:
s = pd.Series(['Galaxy note9', 100], index=['종류', '가격'])
s

종류    Galaxy note9
가격             100
dtype: object

## 데이터프레임 만들기

In [7]:
Phones = pd.DataFrame({
    'Name':['Galaxy note9', 'IphoneX'],
    'Price':[100, 200],
    'Born':['2018-10-20', '2018-11-12'],
    'Died':['2024-11-23', '2023-12-23']
})
Phones

Unnamed: 0,Born,Died,Name,Price
0,2018-10-20,2024-11-23,Galaxy note9,100
1,2018-11-12,2023-12-23,IphoneX,200


### 인덱스 지정하기

In [8]:
Phones = pd.DataFrame(
    data = {'Price':[100, 200],
    'Born':['2018-10-20', '2018-11-12'],
    'Died':['2024-11-23', '2023-12-23']},
    index =['Galaxy note9', 'IphoneX'])
Phones

Unnamed: 0,Born,Died,Price
Galaxy note9,2018-10-20,2024-11-23,100
IphoneX,2018-11-12,2023-12-23,200


### 열 순서 지정하기

In [9]:
Phones = pd.DataFrame(
    data = {'Price':[100, 200],
    'Born':['2018-10-20', '2018-11-12'],
    'Died':['2024-11-23', '2023-12-23']},
    index =['Galaxy note9', 'IphoneX'],
    columns=['Price', 'Born', 'Died'])
Phones

Unnamed: 0,Price,Born,Died
Galaxy note9,100,2018-10-20,2024-11-23
IphoneX,200,2018-11-12,2023-12-23


# 시리즈 다루기 - 기초

In [10]:
first_row = Phones.loc['Galaxy note9']
type(first_row)

pandas.core.series.Series

In [11]:
first_row

Price           100
Born     2018-10-20
Died     2024-11-23
Name: Galaxy note9, dtype: object

### index, values 속성과 keys 메서드 사용하기

In [12]:
first_row.index

Index(['Price', 'Born', 'Died'], dtype='object')

In [13]:
first_row.values

array([100, '2018-10-20', '2024-11-23'], dtype=object)

In [14]:
first_row.keys()

Index(['Price', 'Born', 'Died'], dtype='object')

### 시리즈의 기초 통계 메서드 사용하기

In [15]:
Price = Phones['Price']
Price

Galaxy note9    100
IphoneX         200
Name: Price, dtype: int64

In [16]:
Price.mean()

150.0

In [17]:
Price.max()

200

In [18]:
Price.min()

100

In [19]:
Price.std()

70.71067811865476

## 시리즈 다루기 - 응용

### 시리즈와 불린 추출 사용하기

In [20]:
scientists = pd.read_csv("../data/scientists.csv")

In [38]:
ages = scientists.Age
ages

0    37
1    61
2    90
3    66
4    56
5    45
6    41
7    77
Name: Age, dtype: int64

In [39]:
type(ages)

pandas.core.series.Series

In [23]:
ages.mean()

59.125

In [25]:
ages[ ages > ages.mean() ]

1    61
2    90
3    66
7    77
Name: Age, dtype: int64

#### 아닛, 어떻게 이런일이..?

In [26]:
ages > ages.mean()

0    False
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: Age, dtype: bool

In [29]:
ages[[False, True, True, True, False, False, False, True]]

1    61
2    90
3    66
7    77
Name: Age, dtype: int64

## 시리즈와 브로드캐스팅

In [30]:
ages + ages

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [31]:
ages + 100

0    137
1    161
2    190
3    166
4    156
5    145
6    141
7    177
Name: Age, dtype: int64

In [32]:
ages + pd.Series([1,100])

0     38.0
1    161.0
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
dtype: float64

In [33]:
ages.sort_index(ascending = False)

7    77
6    41
5    45
4    56
3    66
2    90
1    61
0    37
Name: Age, dtype: int64

# 데이터프레임 다루기

In [35]:
scientists.head()

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist


In [34]:
scientists[ scientists['Age'] > scientists['Age'].mean() ]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [40]:
scientists * 2

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline FranklinRosaline Franklin,1920-07-251920-07-25,1958-04-161958-04-16,74,ChemistChemist
1,William GossetWilliam Gosset,1876-06-131876-06-13,1937-10-161937-10-16,122,StatisticianStatistician
2,Florence NightingaleFlorence Nightingale,1820-05-121820-05-12,1910-08-131910-08-13,180,NurseNurse
3,Marie CurieMarie Curie,1867-11-071867-11-07,1934-07-041934-07-04,132,ChemistChemist
4,Rachel CarsonRachel Carson,1907-05-271907-05-27,1964-04-141964-04-14,112,BiologistBiologist
5,John SnowJohn Snow,1813-03-151813-03-15,1858-06-161858-06-16,90,PhysicianPhysician
6,Alan TuringAlan Turing,1912-06-231912-06-23,1954-06-071954-06-07,82,Computer ScientistComputer Scientist
7,Johann GaussJohann Gauss,1777-04-301777-04-30,1855-02-231855-02-23,154,MathematicianMathematician


# 시리즈와 데이터프레임의 데이터 처리하기

## 열 삭제하기

In [44]:
scientists.drop(['Age'], axis=1)

Unnamed: 0,Name,Born,Died,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,Chemist
1,William Gosset,1876-06-13,1937-10-16,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,Nurse
3,Marie Curie,1867-11-07,1934-07-04,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,Biologist
5,John Snow,1813-03-15,1858-06-16,Physician
6,Alan Turing,1912-06-23,1954-06-07,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,Mathematician
