### Introduction to Pandas ,Series Data and DataFrame

In [1]:
import pandas as pd

In [2]:
student = ["Alice", "Jack", "Molly"]
pd.Series(student)

0    Alice
1     Jack
2    Molly
dtype: object

In [3]:
num = [1,2,3,4,5]
pd.Series(num)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [5]:
student = ["Alice", "Jack", None]
pd.Series(student)

0    Alice
1     Jack
2     None
dtype: object

In [6]:
num = [1,2,3,4,None]  # None 값이 있기때문에 dtype이 float
pd.Series(num) 

0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
dtype: float64

In [9]:
# keep in mind when see NaN, it's meaning is similar to None , but it's a numeric value and treated diffently for efficiency reasons.

import numpy as np
print(np.nan == None)
print(np.nan ==np.nan)
print(np.isnan(np.nan))

False
False
True


In [10]:
students_scores = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English'}
s = pd.Series(students_scores)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [11]:
s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [12]:
# the dtype of object is not just for strings, but for arbitrary objects.
students = [("Alice","Brown"), ("Jack", "White"), ("Molly", "Green")]
pd.Series(students)

0    (Alice, Brown)
1     (Jack, White)
2    (Molly, Green)
dtype: object

In [13]:
s = pd.Series(['Physics', 'Chemistry', 'English'], index=['Alice', 'Jack', 'Molly'])
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [14]:
students_scores = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English'}

s = pd.Series(students_scores, index=['Alice', 'Molly', 'Sam'])
s

Alice    Physics
Molly    English
Sam          NaN
dtype: object

### Querying a Series

In [15]:
# To query by numeric location, starting at zero, use the iloc attribute. To query by the index label

students_classes = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [16]:
s.iloc[1]

'Chemistry'

In [17]:
s.loc["Jack"]

'Chemistry'

In [18]:
# Keep in mind that iloc and loc are not methods, they are attributes. So you don't use 
# parentheses to query them, but square brackets instead, which is called the indexing operator. 
# Pandas tries to make our code a bit more readable and provides a sort of smart syntax using 
# the indexing operator directly on the series itself.
print(s[1])
print(s["Jack"])

Chemistry
Chemistry


In [19]:
grades = pd.Series([90, 80, 70, 60])

total = 0
for grade in grades:
    total+=grade
print(total/len(grades))

75.0


In [20]:
import numpy as np

# Then we just call np.sum and pass in an iterable item. In this case, our panda series.

total = np.sum(grades)
print(total/len(grades))

75.0


In [21]:
num = pd.Series(np.random.randint(0,1000,10000))

In [24]:
%%timeit -n 100
total = 0
for number in num:
    total+=number

total/len(num)

1.23 ms ± 53.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [25]:
%%timeit -n 100
total = np.sum(num)
total/len(num)

68.3 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
num+=2
num.head()

0    857
1    558
2    578
3    792
4    322
dtype: int32

In [34]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,1000))

for label, value in s.iteritems():
    s.loc[label]= value+2

44.1 ms ± 4.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [35]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,1000))
s+=2

302 µs ± 61 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [36]:
a= pd.Series([1,2,3])
a["apple"] = 5
a

0        1
1        2
2        3
apple    5
dtype: int64

In [37]:
students_classes = pd.Series({'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'})
students_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [38]:
kelly_classes = pd.Series(['Philosophy', 'Arts', 'Math'], index=['Kelly', 'Kelly', 'Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [40]:
all_students_classes = students_classes.append(kelly_classes)
all_students_classes

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object