In [None]:
import pandas as pd
import mysql.connector as mysql
import seaborn as sns
import matplotlib.pyplot as plt
from credentials import login_config

In [None]:
# Code block to fetch the percentage of 5 star reviews per month and plot the trend line

# Creating a connection to the MySQL Database
connection = mysql.connect(**login_config)

# Defining the period of orders by delivery date 
period = ('2016-01-01 00:00:00', '2018-12-31 23:59:59')

# Loading the query to fetch 5 star reviews percentage per month and counting only orders that were sucessifully delivered
with open("/home/hbeltrao/Hugo/Projects/Olist_EDA_Project/2-Queries/5star_review_percentage_per_month.sql") as query:
    five_star_review_per_month = pd.read_sql_query(query.read(), params=period, con=connection)


In [None]:
# Creating the plot to visualize the 5 star review trend
sns.set_context('paper', font_scale=1.8)
sns.catplot(data=five_star_review_per_month, x="Months", y="percentage", kind='point', aspect=4)
plt.xlabel('Year-Month')
plt.ylabel('% of 5 star reviews')
plt.title("Percentage of 5 star reviews over the months")
plt.xticks(rotation=90)
plt.show()

In [None]:
# Code block to fetch the purchase orders realized, excluding cancelled and unavailablew orders, divided by year and month

# Creating a connection to the MySQL Database
connection = mysql.connect(**login_config)

# Loading the query
with open("/home/hbeltrao/Hugo/Projects/Olist_EDA_Project/2-Queries/Purchases_per_month_and_year.sql") as query:
    purchases_per_year_month = pd.read_sql_query(query.read(), con=connection)

purchases_per_year_month_melted = purchases_per_year_month.melt('Months', var_name='Year', value_name='Purchases')

In [None]:
# Creating the plot to visualize the purchases per month from each year
sns.catplot(data=purchases_per_year_month_melted, x="Months", y="Purchases", hue="Year", kind='point', aspect=4)
plt.xlabel("Months")
plt.ylabel("Purchases")
plt.title("Purchases realized per month through the years")
plt.show()

In [None]:
# Creating the plot to visualize the purchases per month from each year
sns.relplot(data=purchases_per_year_month_melted, kind='line', aspect=4)
plt.xlabel("Months")
plt.ylabel("Purchases")
plt.title("Purchases realized per month through the years")
plt.show()

In [None]:
# Report information with order dispatching efficiency per month

# Creating a connection to the MySQL Database
connection = mysql.connect(**login_config)

# Loading the query
with open("/home/hbeltrao/Hugo/Projects/Olist_EDA_Project/2-Queries/delayed_shipments_by_year_month.sql") as query:
    order_delay_per_year_month = pd.read_sql_query(query.read(), con=connection)

In [None]:
# Plotting orders delayed per month and average delay in days
sns.set_context('paper', font_scale=2.2)
plt.figure(figsize=(30,10))
plt.xlabel("Year-Month")
plt.ylabel("% of delayed orders")
plt.title("Orders Delayed per month")
plt.xticks(rotation=90)

ax = sns.barplot(data=order_delay_per_year_month, x="year_months", y="Delay_Percentage", color='g')

ax2 = ax.twinx()
sns.lineplot(data=order_delay_per_year_month, x="year_months", y="Average_Delay", ax=ax2)

plt.show()