In [None]:
import pandas as pd
data=pd.read_csv(r"C:\Users\denil\OneDrive\Documents\PJME_hourly.csv")
data['Datetime']=pd.to_datetime(data['Datetime'])
data.head()

In [None]:
# Define a function to check if a date is a holiday
def is_holiday(date):
    holiday_dates = ["01-01","04-07","11-28","11-27","11-26","11-25","11-24","11-23","11-22","12-25","05-25","05-26","05-27","05-28","05-29","05-30","05-31","09-01","09-02","09-03","09-04","09-05","09-06","09-07"] # Add your holiday dates here
    return date.strftime("%m-%d") in holiday_dates

# Filter out rows that are holidays
filtered_data_df = data[~data["Datetime"].apply(lambda x: is_holiday(pd.to_datetime(x, format="%m-%d")))]

print(filtered_data_df)

In [None]:
data['MA48'] = data['PJME_MW'].rolling(48).mean()
data['MA336'] = data['PJME_MW'].rolling(336).mean()
# plot
import plotly.express as px
fig = px.line(data, x="Datetime", y=['PJME_MW', 'MA48', 'MA336'], title='energy consumption', template = 'plotly_dark')
fig.show()

In [None]:
# drop moving-average columns
data.drop(['MA48', 'MA336'], axis=1, inplace=True)
# set timestamp to index
data.set_index('Datetime', drop=True, inplace=True)
# resample timeseries to hourly
data = data.resample('H').sum()
# creature features from date
data['day'] = [i.day for i in data.index]
data['day_name'] = [i.day_name() for i in data.index]
data['day_of_year'] = [i.dayofyear for i in data.index]
data['week_of_year'] = [i.weekofyear for i in data.index]
data['hour'] = [i.hour for i in data.index]
data['is_weekday'] = [i.isoweekday() for i in data.index]
data.head()

In [None]:
pip install pycaret

In [None]:
from pycaret.anomaly import *
s = setup(data, session_id = 123)

In [None]:
models()

In [None]:
iforest = create_model('iforest', fraction = 0.1)
iforest_results = assign_model(iforest)
iforest_results.head()

In [None]:
iforest_results[iforest_results['Anomaly'] == 1].head()

In [None]:
import plotly.graph_objects as go
# plot value on y-axis and date on x-axis
fig = px.line(iforest_results, x=iforest_results.index, y="PJME_MW", title='Energy consumption - Isolation Forest', template = 'plotly_dark')
# create list of outlier_dates
outlier_dates = iforest_results[iforest_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [iforest_results.loc[i]["PJME_MW"] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers',
                name = 'Anomaly',
                marker=dict(color='red',size=10)))

fig.show()

In [None]:
histogram = create_model('histogram', fraction = 0.1)
histogram_results = assign_model(histogram)
histogram_results.head()

In [None]:
histogram_results[histogram_results['Anomaly'] == 1].head()

In [None]:
import plotly.graph_objects as go
# plot value on y-axis and date on x-axis
fig = px.line(histogram_results, x=histogram_results.index, y="PJME_MW", title='Energy consumption -Histogram based outlier detection ', template = 'plotly_dark')
# create list of outlier_dates
outlier_dates = histogram_results[histogram_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [histogram_results.loc[i]["PJME_MW"] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers',
                name = 'Anomaly',
                marker=dict(color='red',size=10)))

fig.show()

In [None]:
lof = create_model('lof', fraction = 0.1)
lof_results = assign_model(lof)
lof_results.head()

In [None]:
lof_results[lof_results['Anomaly'] == 1].head()

In [None]:
import plotly.graph_objects as go

# plot value on y-axis and date on x-axis
fig = px.line(lof_results, x=lof_results.index, y="PJME_MW", title='Energy consumption - Local outlier factor', template = 'plotly_dark')
# create list of outlier_dates
outlier_dates = lof_results[lof_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [lof_results.loc[i]["PJME_MW"] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers',
                name = 'Anomaly',
                marker=dict(color='red',size=10)))

fig.show()

In [None]:
outlier_dates

In [None]:
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px

# Assuming you have the 'lof_results' DataFrame with 'Anomaly' column and index as dates
# You might need to adjust the column names accordingly

# Filter out rows where 'day_name' is 'saturday' or 'sunday'
lof_results['day_name'] = lof_results.index.day_name().str.lower()
filtered_outlier_dates = lof_results[(lof_results['Anomaly'] == 1) & (lof_results['day_name'] != 'saturday') & (lof_results['day_name'] != 'sunday')].index

# Create the plot
fig = px.line(lof_results, x=lof_results.index, y="PJME_MW", title='Energy consumption - Local outlier factor', template='plotly_dark')

# Obtain y value of filtered anomalies to plot
filtered_y_values = [lof_results.loc[i]["PJME_MW"] for i in filtered_outlier_dates]
fig.add_trace(go.Scatter(x=filtered_outlier_dates, y=filtered_y_values, mode='markers',
                name='Anomaly',
                marker=dict(color='red', size=10)))

fig.show()

In [None]:
filtered_outlier_dates

In [None]:
new_df=lof_results[(lof_results['Anomaly'] == 1) & (lof_results['day_name'] != 'saturday') & (lof_results['day_name'] != 'sunday')]
new_df

In [None]:
index = new_df.index
print(index)

In [None]:
import pandas as pd

# Assuming your data is loaded into a DataFrame named 'df'
result =  new_df[( new_df['PJME_MW'] > 45000) | ( new_df['PJME_MW'] < 20000)]
result

In [None]:
result.reset_index(inplace=True)
print(result)

In [None]:
import pandas as pd

# Assuming your data is loaded into a DataFrame named 'df'
result['date_only'] = result['Datetime'].dt.date
result


In [None]:
import pandas as pd

# Assuming your data is loaded into a DataFrame named 'df'
# Create a new DataFrame from a single column
column_name = 'date_only'
ne_dataframe = pd.DataFrame(result[column_name])

# Reset index if needed
ne_dataframe_reset = ne_dataframe.reset_index(drop=True)

In [None]:
a=ne_dataframe_reset

In [None]:
a.duplicated().sum()

In [None]:
a.drop_duplicates(inplace=True)

In [None]:
a

In [None]:
import pandas as pd

# Assuming your data is loaded into a DataFrame named 'df'
output_file_path = 'output.csv'  # Specify the path for the output CSV file
a.to_csv(output_file_path, index=False)  
# index=False to exclude the index column in the CSV
from IPython.display import FileLink

# Assuming you have already created the CSV file using df.to_csv() as mentioned earlier
output_file_path = 'output.csv'  # Specify the path for the CSV file

# Create a link to download the CSV file
FileLink(output_file_path)