# Data loading:

In [None]:
# import our own data reader for txt 
import txtreader as tr

# import pandas for structuring the data
import pandas as pd

# import numpy for numerical analysis
import numpy as np

import os

In [None]:
# Geting path for txt file. Using os.
cwd = os.getcwd()
path = cwd + '\\data\\aadr.us.txt'

In [None]:
# Loading data file with type txt, using txtreader (tr):
metadata, text = tr.read_txt(path)

In [None]:
# File metadata:
metadata

In [None]:
# File text/content:
text

In [None]:
# Make a DataFrame with the columns filetype, meta and text:
df = pd.DataFrame(columns = ['filetype','meta', 'text'])

In [None]:
# Verify the columns have been created:
df

In [None]:
# All the txt files:
in_txt = [path]

In [None]:
ingestable = []
for uri in in_txt:
    mymeta, text = tr.read_txt(uri)
    ingestable.append(text)
    
    # Adding an index to the dataframe:
    df.loc[len(df.index)] = ['txt', mymeta, text]
ingestable

In [None]:
df 

In [None]:
df_text = df['text'][0]

In [None]:
df_text.head()

In [None]:
df_text.shape

In [None]:
df_text.columns

# Exploring and Cleaning Data

In [None]:
# Count the missing values in DataFrame
df_text.isnull().sum()

In [None]:
# Hole row with None 
df_text.iloc[1565]

In [None]:
# Droping OpenInt due to being all zero's:
df_text = df_text.drop(['OpenInt'], axis=1)

# Droping hole row at index 1565 due to being all None:
df_text = df_text.drop(1565)

In [None]:
# Now empty/relevant data have been clean/remove:
df_text

In [None]:
# Convert columns to numeric if they are not already:
df_text['Open'] = pd.to_numeric(df_text['Open'])
df_text['Close'] = pd.to_numeric(df_text['Close'])
df_text['High'] = pd.to_numeric(df_text['High'])
df_text['Low'] = pd.to_numeric(df_text['Low'])
df_text['Volume'] = pd.to_numeric(df_text['Volume'])

In [None]:
# Adding an additional column:
# The procent change (daily) from when the marked open to close:
df_text['Change%'] =  ((df_text['Open'] - df_text['Close']) / df_text['Close'] * 100)

In [None]:
df_text

In [None]:
# Types of attributes
df_text.dtypes

In [None]:
# Insights of the value scope
df_text.describe()

In [None]:
# Mean of change% and volume
change_procent = np.mean(df_text['Change%'])
avg = np.mean(df_text['Volume'])

print(f'change_procent: {change_procent}')
print(f'avg: {avg}')

# Visualisation

In [None]:
# Other utilities
from sklearn import datasets, preprocessing, metrics

import matplotlib.pyplot as plt

In [None]:
# Making a diagram that illustrated closing price of a given stock over a period of time (2010 - 2017).

plt.figure(figsize=(10, 6))
plt.plot(pd.to_datetime(df_text['Date']), df_text['Close'])
plt.title('Close Prices Over Time (2010-2017)')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.grid(True)
plt.tight_layout()
plt.show()