In [None]:
# BLOCK 1
# first import the neccessary packages and modules
from utils import *
from logic import *
from notebook import psource
import numpy as np
import skfuzzy as fuzz
import matplotlib.pyplot as plt
from skfuzzy import control as ctrl
import pandas as pd
import seaborn as sns
import re
import nltk
import discord
import os
import nest_asyncio
import requests
import json
import random
nest_asyncio.apply()
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import pickle

In [None]:
# BLOCK 2
# read in data from .csv file
song_data = pd.read_csv('ChristmasSongs.csv')

# check if any cells from any rows are empty
song_data.isnull().sum()

In [None]:
# BLOCK 3
# delete the rows with empty 'Description' cells
song_data.dropna(subset=['Description'], inplace=True)

# reset the dataframe's index
song_data.reset_index(drop=True, inplace=True)

song_data.isnull().sum()

In [None]:
# BLOCK 4
# check min and max year
min_year = song_data['Year'].min()
max_year = song_data['Year'].max()

print(f"Minimum Year: {min_year}")
print(f"Maximum Year: {max_year}")

In [None]:
# BLOCK 5
# create a function to categorize years into classes
def classify_year(year):
    if 1920 <= year <= 1950:
        return 'Vintage Songs'
    elif 1951 <= year <= 1980:
        return 'Classic Songs'
    elif 1981 <= year <= 2023:
        return 'Modern Songs'
    else:
        return 'Unknown'
    
# apply the function to create the 'Era' column
song_data['Era'] = song_data['Year'].apply(classify_year)

# display the updated dataset
song_data

In [None]:
# BLOCK 6
# one hot encoding for the input column
year_dummies = pd.get_dummies(song_data['Year'], prefix='Year', drop_first=True)

# drop the existing columns of Year
song_data = song_data.drop('Year', axis=1)

# Concatenate the one-hot encoded columns with the original DataFrame
song_data = pd.concat([song_data, year_dummies], axis=1)

song_data.head()

In [None]:
# BLOCK 7
# to get all the columns that starts with each prefix via regex due to one hot encoding
year_columns = song_data.filter(regex='^Year').columns

# selecting data from columns
x_year = song_data[year_columns].values

# setting the x (input) and y (output) respectively
x = x_year
y = song_data['Era'].values

In [None]:
# BLOCK 8
# Train-Test split
from sklearn.model_selection import train_test_split

# (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

In [None]:
# BLOCK 9
# training the model with Naive Bayes model (Classification)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(x_train, y_train)

In [None]:
# BLOCK 10
# training the model with Logistic Regression model 
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train, y_train)

In [None]:
# BLOCK 11
# training the model with Decision Tree model 
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='gini',
                                   random_state=100,
                                   max_depth=3, 
                                   min_samples_leaf=3)
model.fit(x_train, y_train)

In [None]:
# BLOCK 12
# evaluating the model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# BLOCK 13
import pickle

# save the iris classification model as a pickle file
model_pkl_file = "song_classifier_model.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(model, file)

year_columns_pkl_file = "year_columns.pkl"  

with open(year_columns_pkl_file, 'wb') as file:  
    pickle.dump(year_columns, file)

In [None]:
# BLOCK 14
# load model from pickle file
model_pkl_file = "song_classifier_model.pkl"  
with open(model_pkl_file, 'rb') as file:  
    model = pickle.load(file)

year_columns_pkl_file = "year_columns.pkl"  
with open(year_columns_pkl_file, 'rb') as file:  
    year_columns = pickle.load(file)

# Create a DataFrame with a single row containing the year to predict
data_to_predict = pd.DataFrame({"Year": ["2000"]})

# Apply one-hot encoding
data_encoded = pd.get_dummies(data_to_predict)

# Reindex to match the columns used during training
data_encoded = data_encoded.reindex(columns=year_columns, fill_value=0)

# Evaluate the model
predictions = model.predict(data_encoded)

# check results
print(predictions)

In [None]:
# BLOCK 15
# read in data from .csv file
category_data = pd.read_csv('Category.csv')

# check if any cells from any rows are empty
category_data.isnull().sum()

In [None]:
# BLOCK 16
# apply NLP algorithm for 'Description' column
corpus = []
for i in range(0, 310):
    #remove non-alphabet, any non-alphabet, []
    description = re.sub('[^a-zA-Z]', ' ', category_data['Questions'][i])
    #all text become lowercase
    description = description.lower()
    #split the sentence to each word (token)
    description = description.split()
    description = ' '.join(description)
    corpus.append(description)

In [None]:
# BLOCK 17
# create Bag of Words (BoW) model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
cv.fit(corpus)
# setting the x (input) and y (output) respectively
x = cv.fit_transform(corpus).toarray()
y = category_data['Category']

In [None]:
# BLOCK 18
# Train-Test split
from sklearn.model_selection import train_test_split

# (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

print(x_train.shape)

In [None]:
# BLOCK 19
# training the model with Naive Bayes model (Classification)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(x_train, y_train)

In [None]:
# BLOCK 20
# training the model with Logistic Regression model 
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train, y_train)

In [None]:
# BLOCK 21
# training the model with Decision Tree model 
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='gini',
                                   random_state=100,
                                   max_depth=3, 
                                   min_samples_leaf=3)
model.fit(x_train, y_train)

In [None]:
# BLOCK 22
# evaluating the model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# BLOCK 23
# save the iris classification model as a pickle file
model_pkl_file = "category_classifier.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(model, file)

vectorizer_pkl_file = "vectorizer.pkl"  

with open(vectorizer_pkl_file, 'wb') as file:  
    pickle.dump(cv, file)

In [None]:
# BLOCK 24
# load model from pickle file
model_pkl_file = "category_classifier.pkl"  
with open(model_pkl_file, 'rb') as file:  
    model = pickle.load(file)

vectorizer_pkl_file = "vectorizer.pkl"  
with open(vectorizer_pkl_file, 'rb') as file:  
    cv = pickle.load(file)

# Create a DataFrame with a single row containing the year to predict
data_to_predict = pd.DataFrame({'Question': ["What is the era of a song from 1970 classified as?"]})

# apply NLP algorithm for processing user input
#remove non-alphabet, any non-alphabet, []
corpus2 = []
cleaned_data = re.sub('[^a-zA-Z]', ' ', data_to_predict['Question'][0])
#all text become lowercase
cleaned_data = cleaned_data.lower()
#split the sentence to each word (token)
cleaned_data = cleaned_data.split()
cleaned_data = ' '.join(cleaned_data)
corpus2.append(cleaned_data)

test_data = cv.transform(corpus2).toarray()

# Evaluate the model
predictions = model.predict(test_data)

# check results
print(predictions)

In [None]:
# BLOCK 25
# apply fuzzy logic to calculate the temperature of weather

# set the antecedent
temp = ctrl.Antecedent(np.arange(0, 111, 1), 'temperature')

# set the consequent
category = ctrl.Consequent(np.arange(0, 111, 1), 'weather_category')

temp['low'] = fuzz.trapmf(temp.universe, [0, 0, 20, 40])
temp['moderate'] = fuzz.trimf(temp.universe, [20, 50, 80])
temp['high'] = fuzz.trapmf(temp.universe, [60, 80, 100, 100])

category['cold'] = fuzz.trapmf(temp.universe, [0, 0, 20, 40])
category['warm'] = fuzz.trimf(temp.universe, [20, 50, 80])
category['hot'] = fuzz.trapmf(temp.universe, [60, 80, 100, 100])

In [None]:
# BLOCK 26
# apply simple rule sets
rule1 = ctrl.Rule(temp['low'], category['cold'])
rule2 = ctrl.Rule(temp['moderate'], category['warm'])
rule3 = ctrl.Rule(temp['high'], category['hot'])

# create control system
weather_ctrl = ctrl.ControlSystem([rule1, rule2, rule3])
weather_system = ctrl.ControlSystemSimulation(weather_ctrl)

In [None]:
# BLOCK 27
weather_system_pkl_file = "weather_system.pkl"  

with open(weather_system_pkl_file, 'wb') as file:  
    pickle.dump(weather_system, file)

In [None]:
# BLOCK 28
# all functions

# api call to return current weather status
def get_temperature():
    api_key = "YOUR_API_KEY"
    weatherMapUrl = "http://api.openweathermap.org/data/2.5/weather?"
    city = "penang,mys"
    units = "metric"

    url = weatherMapUrl + "q=" + city + "&" + "appid=" + api_key + "&" + "units=" + units

    response = requests.get(url).json()

    temperature = response['main']['temp']
    return temperature

def predict_weather():
    weather_system_pkl_file = "weather_system.pkl"
    with open(weather_system_pkl_file, 'rb') as file:  
        weather_system = pickle.load(file)

    temperature = get_temperature()

    weather_system.input['temperature'] = temperature

    weather_system.compute()

    # get the category membership degree
    weather_category_degree = weather_system.output['weather_category']

    # determine the category based on the degree
    if weather_category_degree >= 0 and weather_category_degree < 33.33:
        weather_message = "It feels cold. " + "The temperature is " + str(temperature)
    elif weather_category_degree >= 33.33 and weather_category_degree < 66.66:
        weather_message = "It feels warm. " + "The temperature is " + str(temperature)
    else:
        weather_message = "It feels hot. " + "The temperature is " + str(temperature)

    return weather_message

def predict_category(userInput):
    # load model from pickle file
    model_pkl_file = "category_classifier.pkl"  
    with open(model_pkl_file, 'rb') as file:  
        model = pickle.load(file)

    vectorizer_pkl_file = "vectorizer.pkl"  
    with open(vectorizer_pkl_file, 'rb') as file:  
        cv = pickle.load(file)

    # Create a DataFrame with a single row containing the year to predict
    data_to_predict = pd.DataFrame({'Question': [userInput]})

    # apply NLP algorithm for processing user input
    #remove non-alphabet, any non-alphabet, []
    corpus2 = []
    cleaned_data = re.sub('[^a-zA-Z]', ' ', data_to_predict['Question'][0])
    #all text become lowercase
    cleaned_data = cleaned_data.lower()
    #split the sentence to each word (token)
    cleaned_data = cleaned_data.split()
    cleaned_data = ' '.join(cleaned_data)
    corpus2.append(cleaned_data)

    test_data = cv.transform(corpus2).toarray()

    # Evaluate the model
    predictions = model.predict(test_data)

    # check results
    return predictions

def predict_song_era(userInput):
    # load model from pickle file
    model_pkl_file = "song_classifier_model.pkl"  
    with open(model_pkl_file, 'rb') as file:  
        model = pickle.load(file)

    year_columns_pkl_file = "year_columns.pkl"  
    with open(year_columns_pkl_file, 'rb') as file:  
        year_columns = pickle.load(file)

    yearInput = re.findall(r'\d+', userInput)

    # user asking for era prediction
    # Create a DataFrame with a single row containing the year to predict
    data_to_predict = pd.DataFrame({"Year": [yearInput[0]]})

    # Apply one-hot encoding
    data_encoded = pd.get_dummies(data_to_predict)

    # Reindex to match the columns used during training
    data_encoded = data_encoded.reindex(columns=year_columns, fill_value=0)

    # Evaluate the model
    predictions = model.predict(data_encoded)

    # check results
    return predictions[0]

def get_song():
    song_data = pd.read_csv('ChristmasSongs.csv')
    # delete the rows with empty 'Description' cells
    song_data.dropna(subset=['Description'], inplace=True)

    # reset the dataframe's index
    song_data.reset_index(drop=True, inplace=True)

    random_row = song_data.sample(n=1)

    # extract value from the datarow
    title = random_row['Title'].values[0]
    artist = random_row['Artist'].values[0]
    year = random_row['Year'].values[0]

    recommendation = "You should listen to " + title + " by " + artist + " that is released in the year " + str(year)

    return recommendation

def get_greetings():
    response = requests.get("https://www.greetingsapi.com/random").json()
    message = response['greeting'] + "! " + "That is " + response['type'] + " in " + response['language']
    
    return message

def get_event():
    event_data = pd.read_csv('ChristmasEvents.csv')

    random_row = event_data.sample(n=1)

    title = random_row['EventTitle'].values[0]
    month = random_row['Month'].values[0]
    date = random_row['Date'].values[0]
    location = random_row['Location'].values[0]

    recommendation = "You should go to " + location + " as there will be " + title + " on " + str(date) + " " + month

    return recommendation

def tfidf_vectorize(text_list):
    vectorizer = TfidfVectorizer()

    return vectorizer.fit_transform(text_list)

def get_similarity(user_desc, data_desc):
    # Vectorize the two descriptions
    tfidf_matrix = tfidf_vectorize([user_desc, data_desc])

    # Compute the cosine similarity
    cosine_similarities = linear_kernel(tfidf_matrix[0:1], tfidf_matrix).flatten()
    return cosine_similarities[1]

def get_history(userInput):
    history_data = pd.read_csv('ChristmasHistory.csv')

    history_data['similarity'] = history_data['Description'].apply(lambda x: get_similarity(userInput, x))
    prediction = history_data.sort_values(by='similarity', ascending=False).head(2)

    cleaned_prediction = prediction['Description'].tolist()
    cleaned_prediction = ",".join(cleaned_prediction)
    cleaned_prediction = re.sub(r'\[\d+\]', '', cleaned_prediction)

    return cleaned_prediction

In [None]:
# BLOCK 29
# initialize the bot
token = "YOUR_DISCORD_BOT_TOKEN"

client = discord.Client(intents=discord.Intents.all())

In [None]:
# BLOCK 30
class MyClient(discord.Client):
    async def on_ready(self):
        print(f'Logged in as {self.user} (ID: {self.user.id})')
        print('------')
        print('Ready!')

    async def on_message(self, message):
        # we do not want the bot to reply to itself
        if message.author.id == self.user.id:
            return
        
        category = predict_category(message.content)

        # while the bot is waiting on a response from the model
        # set the its status as typing for user-friendliness
        async with message.channel.typing():
            if (category[0]) == "Greetings":
                reply = get_greetings()
                await message.channel.send(reply)
            elif (category[0] == "Weather"):
                weather_result = str(predict_weather())
                await message.channel.send(weather_result)
            elif (category[0] == "SongsEra"):
                prediction = predict_song_era(message.content)
                await message.channel.send("The era of the song is classified as " + prediction)
            elif (category[0] == "Songs"):
                recommendation = get_song()
                await message.channel.send(recommendation)
            elif (category[0] == "Events"):
                prediction = get_event()
                await message.channel.send(prediction)
            elif (category[0] == "History"):
                prediction = get_history(message.content)
                await message.channel.send(prediction)
            # any exceptions
            else:
                await message.channel.send("I am sorry. I did not get that. Can you please rephrase?")

intents = discord.Intents.default()
intents.message_content = True

def main():
  client = MyClient(intents=intents)
  client.run(token)

if __name__ == '__main__':
  main()