#### Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
x.dtype

dtype('int64')

In [5]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [6]:
# Series is a one dimensional array, has an index and data type

In [10]:
data = [450, 650, 870]
index = ['Don', 'Mike', 'Edwin']
sales = pd.Series(data, index=index)
sales.index.name = "Name"
sales.name = "Total sales per year"
sales

Name
Don      450
Mike     650
Edwin    870
Name: Total sales per year, dtype: int64

In [8]:
sales.index

Index(['Don', 'Mike', 'Edwin'], dtype='object')

In [11]:
sales[0]

450

In [13]:
sales.iloc[0]

450

In [14]:
sales > 500

Name
Don      False
Mike      True
Edwin     True
Name: Total sales per year, dtype: bool

In [24]:
sales[[False, True, True]]

Name
Mike     650
Edwin    870
Name: Total sales per year, dtype: int64

In [25]:
sales[sales > 500]

Name
Mike     650
Edwin    870
Name: Total sales per year, dtype: int64

In [26]:
sales[sales > 500].values

array([650, 870], dtype=int64)

In [27]:
600 in sales

False

In [28]:
2011 in sales

False

In [29]:
650 in sales.values

True

In [30]:
sales.to_dict()

{'Don': 450, 'Mike': 650, 'Edwin': 870}

In [31]:
sales_dict = {
    "Don": 534,
    "Mike": 453,
    "Edwin": 412
}
sales_ser = pd.Series(sales_dict)
sales_ser

Don      534
Mike     453
Edwin    412
dtype: int64

In [42]:
new_sales = pd.Series(sales_dict, index=['Don', 'Mike', 'Sally', 'Edwin', 'Lucy'])
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [33]:
np.isnan(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
Name: Total sales per year, dtype: bool

In [38]:
pd.isnull(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
Name: Total sales per year, dtype: bool

In [34]:
new_sales.loc['Sally'] is None

False

In [36]:
n = np.nan
n

nan

In [37]:
type(n)

float

In [43]:
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [44]:
new_sales.loc['Sally'] = 548
new_sales

Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
dtype: float64

In [45]:
new_sales.index.name = "Sales Person"

In [46]:
new_sales.name = "Total TV Sales"

In [47]:
new_sales

Sales Person
Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
Name: Total TV Sales, dtype: float64

## Data Frames
- Two Dimensional
- Size mutable
- Heterogenous
- Rows and columns (records and series)

In [49]:
sales_df = pd.DataFrame(new_sales)
sales_df

Unnamed: 0_level_0,Total TV Sales
Sales Person,Unnamed: 1_level_1
Don,534.0
Mike,453.0
Sally,548.0
Edwin,412.0
Lucy,


In [50]:
data = [['Adrian', 20], ['Bethany', 23], ['Chloe', 41]]
df = pd.DataFrame(data, columns=["Name", "Age"])
df

Unnamed: 0,Name,Age
0,Adrian,20
1,Bethany,23
2,Chloe,41


In [51]:
new_dict = {
    'Name': ['Tom', 'Jane', 'Steve', 'Lucy'],
    'Sales': [250, 300, 350, 420]
}

df_dict = pd.DataFrame(new_dict)
df_dict

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,420


In [58]:
list_dicts = [
    {'Name': 'Tom', 'Sales' : 300},
    {'Name': 'Greg'},
    {'Name': 'Simone', 'Sales' : 745},
    {'Name': 'Paula', 'Sales' : 542, 'Performance': 'Outstanding'},
    {'Name': 'Yuri', 'Sales' : 453}
]

df_list_dicts = pd.DataFrame(list_dicts)
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300.0,
1,Greg,,
2,Simone,745.0,
3,Paula,542.0,Outstanding
4,Yuri,453.0,


In [59]:
df_list_dicts['Sales']

0    300.0
1      NaN
2    745.0
3    542.0
4    453.0
Name: Sales, dtype: float64

In [60]:
df_list_dicts['Name']

0       Tom
1      Greg
2    Simone
3     Paula
4      Yuri
Name: Name, dtype: object

In [61]:
df_list_dicts.loc[3]

Name                 Paula
Sales                542.0
Performance    Outstanding
Name: 3, dtype: object

In [62]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300.0,
1,Greg,,
2,Simone,745.0,
3,Paula,542.0,Outstanding
4,Yuri,453.0,


In [63]:
df_list_dicts['Grade'] = ['A', 'A', 'B', 'A', 'C']
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300.0,,A
1,Greg,,,A
2,Simone,745.0,,B
3,Paula,542.0,Outstanding,A
4,Yuri,453.0,,C


In [64]:
df_list_dicts.drop('Performance', axis=1)

Unnamed: 0,Name,Sales,Grade
0,Tom,300.0,A
1,Greg,,A
2,Simone,745.0,B
3,Paula,542.0,A
4,Yuri,453.0,C


In [67]:
df_list_dicts.drop(1, axis=0)

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300.0,,A
2,Simone,745.0,,B
3,Paula,542.0,Outstanding,A
4,Yuri,453.0,,C


In [68]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300.0,,A
1,Greg,,,A
2,Simone,745.0,,B
3,Paula,542.0,Outstanding,A
4,Yuri,453.0,,C


In [69]:
df_list_dicts[['Name', 'Sales', 'Grade']]

Unnamed: 0,Name,Sales,Grade
0,Tom,300.0,A
1,Greg,,A
2,Simone,745.0,B
3,Paula,542.0,A
4,Yuri,453.0,C
