## A practice on data exploration and simple analysis using a secondary data.
> * the first step is to import the **pandas** module/library and initialise it

In [None]:
import pandas as pd

* the code below reads the file containing the omoku data into a dataframe.

In [None]:
omoku = pd.read_csv('omoku_data.csv') #names=['date', 'power_on', 'power_out']) 
omoku

In [None]:
omoku.describe()

* for simple computations like mean(average) max, and min, numpy will be used to achieve that, therefore, I'll import numpy as np.

In [None]:
import numpy as np

In [None]:
power_on = omoku['Power_time']
power_on

* calculating the mean, max, and min of power time in Omoku for the period under review

In [None]:
np.mean(power_on)

In [None]:
np.max(power_on)

In [None]:
np.min(power_on)

* handling missing data.

In [None]:
omoku[omoku['Power_time'].isna()]

In [None]:
omoku[omoku['Power_time']==0]

In [None]:
omoku[omoku['Remark'].notna()]

In [None]:
new_data = omoku.fillna(0) # as the NaN values will have no effect on the outcome of the analysis, I filled them with zeros
new_data

* an updated data on omoku power supply

In [None]:
omoku_updated = pd.read_csv('omoku_data.csv', parse_dates=True)
omoku_updated

In [None]:
def format_df(df):
    df['Date'] = df['Date'].astype('datetime64[ns]')
    df = df.set_index('Date')
    return df

In [None]:
omoku_updated = format_df(omoku_updated)
omoku_updated

* dropping the rows with NaN values
  > first, the power_time column is cleaned off the NaN values

In [None]:
omoku_on = omoku_updated['Power_time'].dropna()
omoku_on

* here, the outage column is cleaned and the NaN values dropped.

In [None]:
omoku_off = omoku_updated['Outages'].dropna()
omoku_off

* we now have two sets of new data for the _power_time_ and _outages_, so we will merge them on
the 'Date' column to get a new set of data called **omoku_power**

In [None]:
omoku_power = pd.merge(omoku_on, omoku_off, on='Date')
omoku_power

* to make sure there is no NaN values in the new data set, we use the _isna_ syntax below to check.

In [None]:
omoku_power.isna().sum()

* for easy presentation and interpretation, the data will be resampled(aggregated) into weekly and monthly supply.
> first, let's resample the data into monthly supply.

In [None]:
omoku_monthly_supply = omoku_power.asfreq('ME')
omoku_monthly_supply

* weekly aggregation/ resampling

In [None]:
omoku_weekly = omoku_power.asfreq('W')
omoku_weekly

* using bokeh to make some plots using the omoku_power data

In [None]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
import pandas as pd
from datetime import datetime, timedelta
from bokeh.layouts import row
output_notebook()

In [None]:
omoku_monthly_supply

In [None]:
omoku_monthly_supply.index

### making a time series plot with the monthly power supply.

In [None]:
data = omoku_monthly_supply


p = figure(x_axis_type='datetime', title='Power supply in Omoku', width=800, height =400, x_axis_label='MONTHS IN REVIEW', y_axis_label='POWER_DURATION(hours)')
           
p.line(x='Date', y='Power_time', legend_label='Power_time', source=data, line_width = 2, color = 'red', alpha = 0.7)  
p.line(x='Date', y="Outages", legend_label='Outages', source=data, line_width =2, color= 'green', alpha = 0.7)
p.legend.title = 'legend'
p.legend.label_text_font = "times"
p.legend.label_text_font_style = "italic"
p.legend.label_text_color = "black"

# change border and background of legend
p.legend.border_line_width = 3
p.legend.border_line_color = "blue"
p.legend.border_line_alpha = 0.2
p.legend.background_fill_color = "navy"
p.legend.background_fill_alpha = 0.2

show(p)           

### A double scatter plot on a row

In [None]:
data = omoku_monthly_supply
s1 = figure(x_axis_type='datetime', title='omoku power supply', x_axis_label='months', y_axis_label='power_on_duration', width=450, height=350, background_fill_color="#fafafa")
s1.scatter(x='Date', y='Power_time',source=data, marker="circle", size=16, color="#53777a", alpha=0.8)

s2 = figure(x_axis_type='datetime', title='omoku power supply', x_axis_label='months', y_axis_label='power_off_duration', width=450, height=350, background_fill_color="#fafafa")
s2.scatter(x='Date', y='Outages', source=data, marker="triangle", size=16, color="#c02942", alpha=0.8)

show(row(children=[s1, s2], sizing_mode="scale_width"))


In [None]:
omoku_weekly

In [None]:
omoku_weekly.index

### making a weekly time series plot with the data frame

In [None]:
data = omoku_weekly

p = figure(x_axis_type='datetime', title='Power supply in Omoku', width=1200, height =400, x_axis_label='WEEKS IN REVIEW', y_axis_label='POWER_DURATION(hours)')
           
p.line(x='Date', y='Power_time', legend_label='Power_time', source=data, line_width = 2, color = 'red', alpha = 0.7)  
p.line(x='Date', y="Outages", legend_label='Outages', source=data, line_width =2, color= 'green', alpha = 0.7)
p.legend.title = 'legend'
p.legend.label_text_font = "times"
p.legend.label_text_font_style = "italic"
p.legend.label_text_color = "black"

# change border and background of legend
p.legend.border_line_width = 3
p.legend.border_line_color = "blue"
p.legend.border_line_alpha = 0.2
p.legend.background_fill_color = "navy"
p.legend.background_fill_alpha = 0.2

show(p)           