In [None]:
import numpy as np                 # Importing the numpy library for numerical operations       
import pandas as pd                # Importing the pandas library for data manipulation       
import matplotlib.pyplot as plt    # Importing the matplotlib library for data visualization
from sklearn.model_selection import train_test_split    # Importing the train_test_split function from scikit-learn for splitting data
from sklearn.linear_model import LogisticRegression     # Importing the LogisticRegression model from scikit-learn
from sklearn.preprocessing import StandardScaler        # Importing the StandardScaler for data normalization

In [None]:
# Loading data from CSV files into pandas DataFrames
case = pd.read_csv('example.csv', sep=',')
time = pd.read_csv('example1.csv', sep=',')

# Displaying the first few rows of the 'case' DataFrame
case.head()

In [None]:
# Grouping 'case' data by 'date', aggregating 'weekday', 'time', and 'status' columns, and resetting index
date_grouped = case.groupby('date').agg({'weekday': 'first','time': 'sum','status': 'sum'}).reset_index()
# Grouping 'time' data by 'date', aggregating 'weekday', 'time', and 'status' columns, and resetting index
time_grouped = time.groupby('date').agg({'weekday': 'first','time': 'sum','status': 'sum'}).reset_index()

# Concatenating the grouped 'case' and 'time' DataFrames
case = pd.concat([date_grouped], ignore_index=True)
time = pd.concat([time_grouped], ignore_index=True)

In [None]:
# Saving the concatenated DataFrames to new CSV files
new_file_path = 'case1.csv'
case.to_csv(new_file_path, index=False)

new_file_path1 = 'time1.csv'
time.to_csv(new_file_path1, index=False)

print("Data appended and saved to a new file successfully!")

In [None]:
# Loading the new CSV files into DataFrames
case1 = pd.read_csv('case1.csv', sep=',')
time1 = pd.read_csv('time1.csv', sep=',')

In [None]:
# Converting 'date' column to datetime format
case1['date'] = pd.to_datetime(case1['date'])

# Extracting 'month', 'year', and 'day_of_week' from 'date' column
case1['month'] = case1['date'].dt.month
case1['year'] = case1['date'].dt.year
case1['day_of_week'] = case1['date'].dt.dayofweek

In [None]:
# Converting 'date' column to datetime format for 'time1' DataFrame
time1['date'] = pd.to_datetime(time1['date'])

# Extracting 'month', 'year', and 'day_of_week' from 'date' column for 'time1' DataFrame
time1['month'] = time1['date'].dt.month
time1['year'] = time1['date'].dt.year
time1['day_of_week'] = time1['date'].dt.dayofweek

In [None]:
# Grouping 'case1' data by 'year', aggregating 'time' and 'status', and resetting index
casetime = case1.groupby('year', as_index=False).agg({'year': 'first','time': 'sum','status': 'sum'})

# Grouping 'time1' data by 'year', aggregating 'time' and 'status', and resetting index
totaltime = time1.groupby('year', as_index=False).agg({'year': 'first','time': 'sum','status': 'sum'})

In [None]:
# Merge the dataframes on 'date'
merged_df = pd.merge(case1, time1, on='date', suffixes=('_case', '_time'), how='outer')

# Use np.where to conditionally select 'techtime' and 'status'
merged_df['time'] = np.where(merged_df['time_time'].notna(), merged_df['time_time'], merged_df['time_cases'])
merged_df['status'] = merged_df['status_cases']

# Select the 'weekday', 'year', and 'month' columns from one of the original dataframes
merged_df['weekday'] = merged_df['weekday_cases'].combine_first(merged_df['weekday_time'])
merged_df['year'] = merged_df['year_cases'].combine_first(merged_df['year_time'])
merged_df['month'] = merged_df['month_cases'].combine_first(merged_df['month_time'])

# Drop the extra columns
merged_df = merged_df[['date', 'year', 'month', 'weekday', 'time', 'status']]

# Display the result
merged_df.head(30)

In [None]:
# Group by 'year' and 'weekday', calculate mean of 'time' and 'status'
daily = merged_df.groupby(['year', 'weekday'], as_index=False).agg({'time': 'mean','status': 'mean'})
daily.head(30)

# Group by 'year', aggregate 'time' and 'status', and reset index
mergedtime = merged_df.groupby('year', as_index=False).agg({'year': 'first','time': 'sum','status': 'sum'})
mergedtime.head()

In [None]:
# Import seaborn for data visualization and configure settings
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Set inline plotting for Jupyter Notebooks
%matplotlib inline

# Pairplot to visualize relationships between variables, color-coded by 'weekday'
sns.pairplot(merged_df, hue="weekday")
plt.show()    #Displays the plot

In [None]:
# Create subplots with 1 row and 2 columns for bar plots
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Bar plot for 'time' by 'year', with bars colored by 'year'
sns.barplot(data=merged_df, x='year', y='time', hue='year', ax=axes[0])
axes[0].set_title('Average Daily Time by Year')   # Set title for the first subplot
axes[0].set_ylim(0, 10)  # Set y-axis limit for the first subplot
axes[0].legend(loc='upper right', bbox_to_anchor=(0.9, 0.925))   # Adjust legend position

# Bar plot for 'status' by 'year', with bars colored by 'year'
sns.barplot(data=merged_df, x='year', y='status', hue='year', ax=axes[1])
axes[1].set_title('Average Daily Closes by Year')  # Set title for the second subplot
axes[1].set_ylim(0, 15)  # Set y-axis limit for the second subplot
axes[1].legend(loc='upper right', bbox_to_anchor=(0.9, 0.925))  # Adjust legend position

# Adjust layout to prevent overlapping
plt.tight_layout()
plt.show()   # Display the plots

In [None]:
# Same instance as above but for mergedtime instead of merged_df
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.barplot(data=mergedtime, x='year', y='time', hue='year', ax=axes[0])
axes[0].set_title('Time by Year')
axes[0].set_ylim(0, 1250)
axes[0].legend(loc='upper right', bbox_to_anchor=(0.9, 0.925))

sns.barplot(data=mergedtime, x='year', y='status', hue='year', ax=axes[1])
axes[1].set_title('Closes by Year')
axes[1].set_ylim(0, 2500)
axes[1].legend(loc='upper right', bbox_to_anchor=(0.9, 0.925))

plt.tight_layout()
plt.show()