## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

import utils_02 as utils

%load_ext autoreload
%autoreload 2

#### DEBUGGING

In [2]:
# Sample DataFrame
data = {'A': [1, 2, 3, 4], 
        'B': [10, 20, 30, 40], 
        'C': [100, 200, 300, 400]}
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,A,B,C
0,1,10,100
1,2,20,200
2,3,30,300
3,4,40,400


In [5]:
df.agg('sum') 

A      10
B     100
C    1000
dtype: int64

In [6]:
df.sum()

A      10
B     100
C    1000
dtype: int64

In [7]:
df.agg('mean')

A      2.5
B     25.0
C    250.0
dtype: float64

In [8]:
df.agg(['sum', 'mean'])

Unnamed: 0,A,B,C
sum,10.0,100.0,1000.0
mean,2.5,25.0,250.0


In [9]:
df.sum(), df.mean()

(A      10
 B     100
 C    1000
 dtype: int64,
 A      2.5
 B     25.0
 C    250.0
 dtype: float64)

In [11]:
df.agg({'A': 'sum', 'B': 'mean', 'C': 'sum'})

A      10.0
B      25.0
C    1000.0
dtype: float64

In [12]:
# Compute total of all columns and add as a new row using sum
df.loc['Total'] = df.sum()
df

Unnamed: 0,A,B,C
0,1,10,100
1,2,20,200
2,3,30,300
3,4,40,400
Total,10,100,1000


In [13]:
data = {
    'Region': ['East', 'West', 'East', 'West', 'East'],
    'Sales': [100, 200, 150, 250, 120],
    'Units': [5, 10, 8, 12, 6]
}
df = pd.DataFrame(data)

In [14]:
df

Unnamed: 0,Region,Sales,Units
0,East,100,5
1,West,200,10
2,East,150,8
3,West,250,12
4,East,120,6


In [15]:
regions = df.groupby('Region')

In [16]:
regions['Sales'].sum()

Region
East    370
West    450
Name: Sales, dtype: int64

In [17]:
regions['Units'].count()

Region
East    3
West    2
Name: Units, dtype: int64

In [19]:
regions.agg({'Sales': 'sum', 'Units': 'count'})  

Unnamed: 0_level_0,Sales,Units
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,370,3
West,450,2


## 01 Creating a Series from Python objects 

In [21]:
calorie_info = {
    "Cereal": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342,
}

In [22]:
calorie_info

{'Cereal': 125, 'Chocolate Bar': 406, 'Ice Cream Sundae': 342}

In [26]:
calories = pd.Series(calorie_info, name="Calories")

In [27]:
calories

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
Name: Calories, dtype: int64

## 02 Series attributes

In [29]:
calories.index, type(calories.index)

(Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object'),
 pandas.core.indexes.base.Index)

In [30]:
calories.values, type(calories.values)

(array([125, 406, 342]), numpy.ndarray)

In [31]:
calories.index.is_unique

True

In [36]:
calories.is_monotonic_increasing

False

## 03 Passing the Series to Python’s built-in functions

In [37]:
cities = pd.Series(
data = ["San Francisco", "Los Angeles", "Las Vegas", np.nan]
)

In [38]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [39]:
list(cities)

['San Francisco', 'Los Angeles', 'Las Vegas', nan]

In [40]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las Vegas', 3: nan}