In [None]:
# Inference for Software Engineering Bug Reporting

## Dataset's  A streaming service, WebFlix, delivers its content through several channels:
## iOS app, Android app, Roku app, Fire TV app and web browsers. Each channel is managed by a different software engineering team. The engineering teams track the number of bugs reported each week and monitors patterns. Management of the Web team identified a worrying upwards trend in the number of bugs reported and provided training to the team in May 2020 to address the problem.

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns 
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
from causalimpact import CausalImpact

In [None]:
# Import the bug tracking data and change the date field to date type.
df = pd.read_csv('bugs.csv')
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')

In [None]:
df.info()

In [None]:
# Subset for the bug reports for the Web software engineering team.
df_web = df[df['channel'] == 'Web']
df_web.head()

In [None]:
# Plot the bugs reported for the Web team during 2020.
sns.set_theme(style='darkgrid')
plt.figure(figsize=(15,8))

s = sns.lineplot(data=df_web, x="date", y='bugs', linewidth=1)
s.set_title('Number of Bugs Reported for Web Software Engineering Team by Week in 2020', y=1.02, fontsize=15)
s.set_xlabel('Date', fontsize=14, labelpad=15)
s.set_ylabel('Frequency', fontsize=14, labelpad=15)
plt.axvline(dt.datetime(2020,5, 25), color='r', linewidth=1, linestyle='--')
plt.show()

In [None]:
# Plot the bugs reported for all software engineering teams during 2020.
sns.set_theme(style='darkgrid')
plt.figure(figsize=(15,8))

s = sns.lineplot(data=df, x="date", y='bugs', hue='channel', linewidth=1, palette='cool')
s.set_title('Number of Bugs Reported for All Software Engineering Teams by Week in 2020', y=1.02, fontsize=15)
s.set_xlabel('Date', fontsize=14, labelpad=15)
s.set_ylabel('Frequency', fontsize=14, labelpad=15)
plt.axvline(dt.datetime(2020,5,25), color='r', linewidth=1, linestyle='--')
plt.show()

In [None]:
# Convert the dataframe into a wide form using a pivot table.
df_piv = pd.pivot_table(df, values='bugs', index='date', columns='channel', aggfunc=np.sum)

# Position the Web column as the first column. since it is our Y.  All other columns are our X.
df_piv = df_piv[['Web', 'Android', 'iOS', 'Roku', 'Fire TV']]
df_piv.head()

In [None]:
# Flatten the pivot table and set the index of the dataframe to the date column. 
df_flattened = pd.DataFrame(df_piv.to_records()).reset_index(drop=True)
df_flattened = df_flattened.set_index('date')
df_flattened.head()

In [None]:
# Set the pre_period for the model to the dates prior to the delivery of training of the Web software
# engineering team, and the post_period to the datesafter the training.
pre_period = ['20200106', '20200525']
post_period = ['20200601', '20201228']

# Run the model and display the results.
ci = CausalImpact(df_flattened, pre_period, post_period, prior_level_sd=None)

In [None]:
ci.plot(panels=['original'], figsize=(16,4))


In [None]:
ci.plot(panels=['pointwise'], figsize=(16,4))


In [None]:
ci.plot(panels=['cumulative'], figsize=(16,4))


In [None]:
# Display model summary output.
print(ci.summary())

In [None]:
# Display model analysis report.
print(ci.summary(output='report'))