In [50]:
import pandas as pd
import numpy as np

df = pd.read_csv("daily-total-female-births.csv")
df.head()

Unnamed: 0,Date,Births
0,1959-01-01,35
1,1959-01-02,32
2,1959-01-03,30
3,1959-01-04,31
4,1959-01-05,44


# Explore dataset

In [51]:
df.describe()

Unnamed: 0,Births
count,365.0
mean,41.980822
std,7.348257
min,23.0
25%,37.0
50%,42.0
75%,46.0
max,73.0


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    365 non-null    object
 1   Births  365 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 5.8+ KB


In [53]:
df.dtypes

Date      object
Births     int64
dtype: object

In [54]:
len(df)

365

In [55]:
# convert from object dtype into datetime dtype
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.head()

Unnamed: 0,Date,Births
0,1959-01-01,35
1,1959-01-02,32
2,1959-01-03,30
3,1959-01-04,31
4,1959-01-05,44


In [56]:
df.dtypes

Date      datetime64[ns]
Births             int64
dtype: object

In [57]:
# group by date to get sum of births
m = df.Date.dt.to_period("M")
df_temp = df.groupby(m)
df1 = df_temp.sum()

In [58]:
df1

Unnamed: 0_level_0,Births
Date,Unnamed: 1_level_1
1959-01,1213
1959-02,1148
1959-03,1218
1959-04,1195
1959-05,1208
1959-06,1212
1959-07,1300
1959-08,1351
1959-09,1446
1959-10,1368


In [59]:
# df_temp.groups.keys()

In [60]:
# get list of months
list_of_months = list(m.unique().strftime("%Y-%m"))
list_of_months

['1959-01',
 '1959-02',
 '1959-03',
 '1959-04',
 '1959-05',
 '1959-06',
 '1959-07',
 '1959-08',
 '1959-09',
 '1959-10',
 '1959-11',
 '1959-12']

# Function (non-sliding window approach)

This is the first naive approach and the complexity is O(2n^2) -> O(n^2)

In [61]:
# input the monthly frame
n = int(input("Input your monthly frame? "))

# natality function
def natality_func1(n):
    l = len(df1) # 12 months
    k = l # for months reduction
    list1 = [] # master list -> the output will be used for the final dataframe
    
    for i in range(l): # iterate for 12 times/months
        list2 = []
        for j in range(n): # iterate based on the monthly frame
            if k < n and j >= k: # j is the iterator for monthly frame, n is the monthly frame, k is reduced by 1 from 12 (12 is the total months) 
                list2.append(None)
            else:
                list2.append(df1.iloc[j+i]["Births"])     
        total = 0
        sum = 0
        for j in range(n): # iterate to get the sum and total, it will be used to count mean
            if list2[j] is None:
                continue
            else:
                sum += list2[j]
                total +=1 

        mean = sum / total # count mean
        list2.append(mean)
        list1.append(list2) # append to the master list
        k -= 1
        
    return list1

list1 = natality_func1(n)

Input your monthly frame? 6


In [62]:
list1

[[1213, 1148, 1218, 1195, 1208, 1212, 1199.0],
 [1148, 1218, 1195, 1208, 1212, 1300, 1213.5],
 [1218, 1195, 1208, 1212, 1300, 1351, 1247.3333333333333],
 [1195, 1208, 1212, 1300, 1351, 1446, 1285.3333333333333],
 [1208, 1212, 1300, 1351, 1446, 1368, 1314.1666666666667],
 [1212, 1300, 1351, 1446, 1368, 1350, 1337.8333333333333],
 [1300, 1351, 1446, 1368, 1350, 1314, 1354.8333333333333],
 [1351, 1446, 1368, 1350, 1314, None, 1365.8],
 [1446, 1368, 1350, 1314, None, None, 1369.5],
 [1368, 1350, 1314, None, None, None, 1344.0],
 [1350, 1314, None, None, None, None, 1332.0],
 [1314, None, None, None, None, None, 1314.0]]

In [63]:
# df_months = pd.DataFrame(list_of_months)
# df_months.rename(columns={0:"month"}, inplace=True)
# df_months

In [64]:
# list result from the natality_func1(n) is put into dataframe
df_res = pd.DataFrame(list1)

# rename di column to m_0, m_1, ... and the last column name into "average"
for i, col in enumerate(df_res.columns):
    df_res.rename(columns={col:"m_" + str(i)},inplace=True)
df_res.rename(columns={df_res.columns[-1]:"average"},inplace=True)

# insert date column into the final dataframe result
df_res.insert(0, "Date", list(m.unique().strftime("%Y-%m")))
df_res

Unnamed: 0,Date,m_0,m_1,m_2,m_3,m_4,m_5,average
0,1959-01,1213,1148.0,1218.0,1195.0,1208.0,1212.0,1199.0
1,1959-02,1148,1218.0,1195.0,1208.0,1212.0,1300.0,1213.5
2,1959-03,1218,1195.0,1208.0,1212.0,1300.0,1351.0,1247.333333
3,1959-04,1195,1208.0,1212.0,1300.0,1351.0,1446.0,1285.333333
4,1959-05,1208,1212.0,1300.0,1351.0,1446.0,1368.0,1314.166667
5,1959-06,1212,1300.0,1351.0,1446.0,1368.0,1350.0,1337.833333
6,1959-07,1300,1351.0,1446.0,1368.0,1350.0,1314.0,1354.833333
7,1959-08,1351,1446.0,1368.0,1350.0,1314.0,,1365.8
8,1959-09,1446,1368.0,1350.0,1314.0,,,1369.5
9,1959-10,1368,1350.0,1314.0,,,,1344.0


# Function (sliding window approach)

If we use sliding window approach, the complexity become O(k*n). The idea of using sliding windows is to subtract <br>
the first element in the list and add/append the new value to the tail of the list.

In [65]:
# input the monthly frame
n = int(input("Input your monthly frame? "))

# natality function using sliding window approach
def natality_func2(n):
    l = len(df1) # 12 months
    super_list = [] # master list -> the output will be used for the final dataframe 
    list_w = [] # store the windowed values
    sum1 = 0
    
    # initialize the list_w with the first window elements
    for i in range(n):
        list_w.append(df1.iloc[i]["Births"])
        sum1 += df1.iloc[i]["Births"]
    
    list_temp = list_w.copy() # avoid pass by reference
    super_list.append(list_temp)
    list_avg = [] # list to store average values
    
    # pop the head of the list, and add the new item to the tail of the list. In this iteration we also calculate the average
    for j in range(n, l):
        # calculate the average and store in the list
        avg = sum1/len(list_w)
        list_avg.append(avg)
        
        # add newest month to the list, remove the older month, and add the updated list to the super_list
        list_w.append(df1.iloc[j]["Births"]) 
        
        pop_val = list_w.pop(0)
        
        list_temp = list_w.copy() # avoid pass by reference
        
        super_list.append(list_temp)     
        
        # assign the updated sum 
        sum1 = sum1 + df1.iloc[j]["Births"] - pop_val
    
    avg = sum1/len(list_w)
    list_avg.append(avg)
    
    # Fill None and compute average
    for i in range(n-1):
        list_w.append(None)
        
        pop_val = list_w.pop(0)
        list_temp = list_w.copy()
        super_list.append(list_temp)
        
        sum1 = sum1 - pop_val
        
        avg = sum1/(len(list_w)-(i+1))
        list_avg.append(avg)
        
    super_list.append(list_avg)
    return super_list
    
res = natality_func2(n)

Input your monthly frame? 6


In [66]:
# extract the average list
list_avg = res[-1]
res.pop()

[1199.0,
 1213.5,
 1247.3333333333333,
 1285.3333333333333,
 1314.1666666666667,
 1337.8333333333333,
 1354.8333333333333,
 1365.8,
 1369.5,
 1344.0,
 1332.0,
 1314.0]

In [67]:
list_avg

[1199.0,
 1213.5,
 1247.3333333333333,
 1285.3333333333333,
 1314.1666666666667,
 1337.8333333333333,
 1354.8333333333333,
 1365.8,
 1369.5,
 1344.0,
 1332.0,
 1314.0]

In [68]:
res

[[1213, 1148, 1218, 1195, 1208, 1212],
 [1148, 1218, 1195, 1208, 1212, 1300],
 [1218, 1195, 1208, 1212, 1300, 1351],
 [1195, 1208, 1212, 1300, 1351, 1446],
 [1208, 1212, 1300, 1351, 1446, 1368],
 [1212, 1300, 1351, 1446, 1368, 1350],
 [1300, 1351, 1446, 1368, 1350, 1314],
 [1351, 1446, 1368, 1350, 1314, None],
 [1446, 1368, 1350, 1314, None, None],
 [1368, 1350, 1314, None, None, None],
 [1350, 1314, None, None, None, None],
 [1314, None, None, None, None, None]]

In [69]:
# convert list to dataframe
df_sw = pd.DataFrame(res)
df_sw # data frame using sliding windows

Unnamed: 0,0,1,2,3,4,5
0,1213,1148.0,1218.0,1195.0,1208.0,1212.0
1,1148,1218.0,1195.0,1208.0,1212.0,1300.0
2,1218,1195.0,1208.0,1212.0,1300.0,1351.0
3,1195,1208.0,1212.0,1300.0,1351.0,1446.0
4,1208,1212.0,1300.0,1351.0,1446.0,1368.0
5,1212,1300.0,1351.0,1446.0,1368.0,1350.0
6,1300,1351.0,1446.0,1368.0,1350.0,1314.0
7,1351,1446.0,1368.0,1350.0,1314.0,
8,1446,1368.0,1350.0,1314.0,,
9,1368,1350.0,1314.0,,,


In [70]:
# rename column to m_0, m_1, ....
for i, col in enumerate(df_sw.columns):
    df_sw.rename(columns={col:"m_" + str(i)},inplace=True)

# insert date to the first column of the dataframe
df_sw.insert(0, "Date", list(m.unique().strftime("%Y-%m")))

# assign list average to the last column of the data frame
df_sw.assign(average = list_avg)

Unnamed: 0,Date,m_0,m_1,m_2,m_3,m_4,m_5,average
0,1959-01,1213,1148.0,1218.0,1195.0,1208.0,1212.0,1199.0
1,1959-02,1148,1218.0,1195.0,1208.0,1212.0,1300.0,1213.5
2,1959-03,1218,1195.0,1208.0,1212.0,1300.0,1351.0,1247.333333
3,1959-04,1195,1208.0,1212.0,1300.0,1351.0,1446.0,1285.333333
4,1959-05,1208,1212.0,1300.0,1351.0,1446.0,1368.0,1314.166667
5,1959-06,1212,1300.0,1351.0,1446.0,1368.0,1350.0,1337.833333
6,1959-07,1300,1351.0,1446.0,1368.0,1350.0,1314.0,1354.833333
7,1959-08,1351,1446.0,1368.0,1350.0,1314.0,,1365.8
8,1959-09,1446,1368.0,1350.0,1314.0,,,1369.5
9,1959-10,1368,1350.0,1314.0,,,,1344.0


**The resulting dataframes using sliding window and non sliding window approach are the same**