In [None]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.tree import DecisionTreeRegressor

In [None]:
# List of file paths to monthly reports
files = [
    "C:\\Users\\Dovid Glassner\Downloads\monthly_report.1.2022.csv",
    "C:\\Users\\Dovid Glassner\Downloads\monthly_report.2.2022.csv",
    "C:\\Users\\Dovid Glassner\Downloads\monthly_report.3.2022.csv",
    "C:\\Users\\Dovid Glassner\Downloads\monthly_report.4.2022.csv"
]

In [None]:
# List to store the valid (existing) files
valid_files = []

In [4]:
# Loop through the list of file paths
for file in files:
    # Check if the file exists
    if os.path.exists(file):
        # If it exists, add it to the list of valid files
        valid_files.append(file)
    else:
        # If it doesn't exist, print a message indicating the missing file
        print(f"File not found: {file}")

In [5]:
# Check if there are any valid files
if valid_files:
    # If there are, concatenate the valid files into a single DataFrame
    df = pd.concat([pd.read_csv(file) for file in valid_files])
else:
    # If there aren't, print a message indicating that there are no valid files
    print("No valid files to concatenate.")

In [6]:
# Load the monthly report data into a Pandas DataFrame
df = df

In [7]:
df.columns

Index(['Department', 'Search Term', 'Search Frequency Rank', '#1 Clicked ASIN',
       '#1 Product Title', '#1 Click Share', '#1 Conversion Share',
       '#2 Clicked ASIN', '#2 Product Title', '#2 Click Share',
       '#2 Conversion Share', '#3 Clicked ASIN', '#3 Product Title',
       '#3 Click Share', '#3 Conversion Share'],
      dtype='object')

In [8]:
# Create a list of the ASIN columns
asin_columns = ['#1 Clicked ASIN', '#2 Clicked ASIN', '#3 Clicked ASIN']

In [9]:
# Combine the values from the ASIN columns into a single series
asin_series = df[asin_columns].stack().dropna()

In [10]:
# Count the frequency of each ASIN
asin_counts = asin_series.value_counts()

In [None]:
# Print the result
print(asin_counts)

In [None]:
# Only show for Search Frequency Ranck between and specific range & replace any non-numeric values with NaN
df['Search Frequency Rank'] = pd.to_numeric(df['Search Frequency Rank'], errors='coerce')
df = df[(df['Search Frequency Rank'] >= 200000) & (df['Search Frequency Rank'] <= 300000)].dropna()

In [None]:
# Show the ASINs that appear more than 5 times but less than or equal to 20 times
asin_counts = asin_counts[(asin_counts > 5) & (asin_counts <= 10)]

In [None]:
# Plot the frequency of each ASIN as a bar graph
asin_counts.plot(kind='bar', title='ASIN Frequency')
plt.xlabel('ASIN')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Group the data by keyword and calculate the frequency of each keyword
keyword_counts = df.groupby('ASIN')['ASIN'].count()

# Plot the trend of each keyword
keyword_counts.plot(title='Keyword Trend')
plt.xlabel('Keyword')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Train an ARIMA model on the keyword data to forecast future trends
arima_model = sm.tsa.ARIMA(keyword_counts, order=(1, 0, 1)).fit()
forecast = arima_model.forecast(steps=12)  # forecast for the next 12 months

In [None]:
# Plot the forecasted trend of each keyword
forecast.plot(title='Forecasted Keyword Trend')
plt.xlabel('Month')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Train a decision tree on the ASIN data to predict future rankings
x = df.drop(['ASIN', 'Rank'], axis=1)
y = df['Rank']
regressor = DecisionTreeRegressor().fit(x, y)

In [None]:
# Predict the future rankings of the ASINs
asin_rank_predictions = regressor.predict(x)

# Plot the predicted rankings of each ASIN
plt.scatter(df['ASIN'], asin_rank_predictions)
plt.title('Predicted ASIN Rankings')
plt.xlabel('ASIN')
plt.ylabel('Rank')
plt.show()
