# Análisis

In [2]:
import warnings
warnings.filterwarnings("ignore",category=FutureWarning)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime as dt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import metrics
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN
from numpy.linalg import svd
import requests
import json


sscaler = StandardScaler()
rscaler = RobustScaler()
mmscaler = MinMaxScaler()
mascaler = MaxAbsScaler()

le = LabelEncoder()

def lencoder(df, col, lenc=False, train=True):
    if train:
        label = le.fit_transform(df[col])
        df[col] = pd.to_numeric(label)
        return df,le
    elif lenc != False:
        label = lenc.transform(df[col])
        df[col] = pd.to_numeric(label)
        return df
    else:
        print('Nothing done')
        return df



In [3]:
def run_kfold_model (X,y,K=10,graph=True):
    regr = LinearRegression()
    kf = KFold(n_splits=5)
    s=1
    for train_index, test_index in kf.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        regr.fit(X_train, y_train)
        print('Slice '+str(s)+' score: '+str(regr.score(X_test, y_test)))
        y_pred = regr.predict(X_test)
        if graph:
            plt.scatter(y_test, y_pred, color = 'red', marker = 'o', s = 35, alpha = 0.5, label = 'Test data')
            plt.show()
        s+=1
    return(regr)

In [4]:
def scores (model, X_train, X_test, y_test, y_pred, graph=True):
    MAE=metrics.mean_absolute_error(y_test, y_pred)
    MSE=metrics.mean_squared_error(y_test, y_pred)
    RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_pred))
    CV_RMSE= (RMSE/np.average(y_test))*100
    R2=metrics.r2_score(y_test, y_pred)
    ACC=metrics.accuracy_score
    REC=metrics.recall_score
    PREC=metrics.precision_score

    print('MAE:'+str(MAE))
    print('MSE:'+str(MSE))
    print('RMSE:'+str(RMSE))
    print('CV(RMSE): '+str(CV_RMSE))
    print('R2:'+str(R2))
    print('Accuracy:'+str(ACC))
    print('Recall:'+str(REC))
    print('Precision:'+str(PREC))

    # print('Training score: '+str(model.score(X_train, y_train))) #(Accuracy or R2)
    # print('Test score: '+str(model.score(X_test, y_train)))
    if graph == True:
        plt.scatter(y_train, y_pred, color = 'red', marker = 'o', s = 35, alpha = 0.5, label = 'Test data')
        plt.show()

In [5]:
import dask.dataframe as dd

In [6]:
from ast import literal_eval
def clean(x):
    return literal_eval(x)

In [7]:
pd.set_option("mode.copy_on_write", True)
pd.options.mode.copy_on_write = True

In [8]:
def dummies(df, col):
    tDummy = pd.get_dummies(df[col]).add_prefix(col+'_')
    df = pd.concat([df,tDummy],axis=1).reindex(df.index)
    return(df)

def just_dummies(df, col):
    tDummy = pd.get_dummies(df[col]).add_prefix(col+'_')
    return(tDummy)

def just_dummies_steps(df, col, ix):
    for i in range(len(df)):
        dfr = df.loc[df[ix]==i]
        if i==0:
            tDummy0 = dummies(dfr,col)
        else:
            tDummy = dummies(dfr,col)
            tDummy0 = pd.concat([tDummy0,tDummy],axis=1)
    return(tDummy)

# Checkpoint 1

In [104]:
# data = pd.read_excel('mozilla-bugs-all.xlsx')
# data.describe

In [105]:
def json_data(URL,params):
    headers = {"Accept" : "application/json"}
    resp = requests.get(URL, params= params, headers= headers)
    df = pd.DataFrame()
    if resp.status_code != 200:
        print('error: ' + str(resp.status_code))
    else:
        print('Success')
        bugs = resp.text
        data = json.loads(bugs)
        df = pd.json_normalize(data['bugs'])
    return(df)

params ={
    "include_fields" : ["id"
                        ,"summary"
                        ,"status"
                        ,"description"
                        ,"type"
                        ,"classification"
                        ,"product"
                        ,"component"
                        ,"priority"
                        ,"assigned_to"
                        ,"resolution"
                        ,"creation_time"
                        ,"last_change_time"
                        ,"severity"
                        ,"version"
                        ]
    ,"product" : "Core"
    # ,"status" : ["VERIFIED","RESOLVED","CLOSED","UNCONFIRMED","NEW"]
    ,"limit" : 10000
    ,"order": "opendate DESC"
}
URL = "https://bugzilla.mozilla.org/rest/bug"


In [106]:
params["status"] = "RESOLVED"
params.pop("status", None)

test_df = json_data(URL,params)
print(test_df.columns.to_list())
test_df.tail()

In [None]:
# for i in test_df.columns.to_list():
#     print("Column "+i+" unique values:")
#     # print(test_df[i].unique())
#     print(test_df[i].value_counts())

#     print()


In [None]:
test_df.groupby(['resolution']).sum().plot(kind='pie', y='id', autopct='%1.0f%%', labeldistance=None)
from datetime import datetime

print(test_df['creation_time'].min())
datetime.now() - pd.to_datetime(pd.to_datetime(test_df['creation_time'][test_df['resolution']=='FIXED']).values.astype(np.int64).mean())
plt.pie(x=test_df['resolution'].unique(), labels=test_df['resolution'].value_counts())
plt.show()

plt.title('Category Distribution')
plt.ylabel('')  # Hide the y-label
plt.show()

test_df.to_csv("dataset_20240907.csv")
test_df.date = pd.to_datetime(test_df.date).values.astype(np.int64)

test_df = pd.DataFrame(pd.to_datetime(test_df.groupby('column').mean().date))

# Checkpoint 2

In [None]:
data = pd.read_csv('dataset_20240827.csv')
data.describe

<bound method NDFrame.describe of       Unnamed: 0 resolution product         creation_time priority  \
0              0        NaN    Core  2024-08-27T07:10:15Z       P5   
1              1        NaN    Core  2024-08-27T07:09:56Z       --   
2              2        NaN    Core  2024-08-27T06:56:08Z       --   
3              3        NaN    Core  2024-08-27T06:13:19Z       P5   
4              4        NaN    Core  2024-08-27T05:29:11Z       --   
...          ...        ...     ...                   ...      ...   
9995        9995        NaN    Core  2024-03-23T18:04:58Z       --   
9996        9996        NaN    Core  2024-03-23T18:01:29Z       --   
9997        9997        NaN    Core  2024-03-23T17:57:21Z       P3   
9998        9998        NaN    Core  2024-03-23T17:56:41Z       P3   
9999        9999        NaN    Core  2024-03-23T17:55:13Z       P3   

     classification    type      last_change_time  \
0        Components  defect  2024-08-27T07:10:15Z   
1        Components

In [None]:
#import nltk
#nltk.download('punkt_tab')
#nltk.download('stopwords')
#nltk.download('punkt')
#nltk.download('wordnet')

In [None]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tokenize import MWETokenizer
from nltk.stem import WordNetLemmatizer
import math
import re

def NLProcess (text):

    # print('Original text: ',data.iloc[1]['description'])
    #print("====================== starting ======================")
    ######################################################################### Tokenised
    mwe_tokenizer = MWETokenizer([('does', 'not'), ('in', 'spite', 'of'),('don', '’', 't')])
    tokenizer = word_tokenize
    tokenised = tokenizer(text)
    #print('Tokenised: ',tokenised)
    retokenised = mwe_tokenizer.tokenize(tokenised)
    # retokenised = [token.replace('_', '') for token in retokenised]
    #print('ReTokenised: ',retokenised)

    ######################################################################### Lower-case converted
    normalised = [word.lower() for word in retokenised]
    #print('Normalised: ',normalised)

    ######################################################################### Punctuation marks removed
    unmarked = [re.sub(r'[^\w\s]', '', token) for token in normalised if re.sub(r'[^\w\s]', '', token)]
    #print('Punctuation marks removed: ',unmarked)

    ######################################################################### Stop-words removed
    stop_words = set(stopwords.words('english'))
    filtered = [word for word in unmarked if word not in stop_words]
    #print('Stop-words removed: ',filtered)

    ######################################################################### Lemmatised
    lemmatizer = WordNetLemmatizer()
    lemmatised = [lemmatizer.lemmatize(word) for word in filtered]
    #print('Lemmatised: ',lemmatised)

    return lemmatised


In [None]:
# def dfNLProcess (df,column):
#     for index, row in df.iterrows():
#         text = row[column]
#         # print('Original text: ',data.iloc[1]['description'])
#         #print("====================== starting ======================")
#         ######################################################################### Tokenised
#         mwe_tokenizer = MWETokenizer([('does', 'not'), ('in', 'spite', 'of'),('don', '’', 't')])
#         tokenizer = word_tokenize
#         tokenised = tokenizer(text)
#         #print('Tokenised: ',tokenised)
#         retokenised = mwe_tokenizer.tokenize(tokenised)
#         # retokenised = [token.replace('_', '') for token in retokenised]
#         #print('ReTokenised: ',retokenised)

#         ######################################################################### Lower-case converted
#         normalised = [word.lower() for word in retokenised]
#         #print('Normalised: ',normalised)

#         ######################################################################### Punctuation marks removed
#         unmarked = [re.sub(r'[^\w\s]', '', token) for token in normalised if re.sub(r'[^\w\s]', '', token)]
#         #print('Punctuation marks removed: ',unmarked)

#         ######################################################################### Stop-words removed
#         stop_words = set(stopwords.words('english'))
#         filtered = [word for word in unmarked if word not in stop_words]
#         #print('Stop-words removed: ',filtered)

#         ######################################################################### Lemmatised
#         lemmatizer = WordNetLemmatizer()
#         lemmatised = [lemmatizer.lemmatize(word) for word in filtered]
#         #print('Lemmatised: ',lemmatised)

#         lemmatised
#         ncol = 'P'+col
#         df2 = pd.DataFrame({ncol:lemmatised})
#         df2 = dummies(df2)


In [None]:
# dfNLProcess(data,'summary')

In [None]:
# for i in range(10):
#     NLProcess(data.iloc[i]['description'])

data['Psummary'] = data['summary'].map(NLProcess)

# type(data['summary'])

In [None]:
data['Pdescription'] = data['description'].fillna('').map(NLProcess)

In [None]:
#data.loc[data['description'].str.contains('//crash-stats.mozilla.org/report/index/768c5c44-57c5-4746-890c-9af820240811', case=False, na=False)]

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,resolution,product,creation_time,priority,classification,type,last_change_time,component,summary,...,id,severity,version,assigned_to_detail.nick,assigned_to_detail.id,assigned_to_detail.name,assigned_to_detail.real_name,assigned_to_detail.email,Psummary,Pdescription
0,0,,Core,2024-08-27T07:10:15Z,P5,Components,defect,2024-08-27T07:10:15Z,Widget,Intermittent widget/tests/browser/browser_test...,...,1915086,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, widgettestsbrowserbrowser_test_...","[filed, nfay, mozillacom, parsed, log, http, t..."
1,1,,Core,2024-08-27T07:09:56Z,--,Components,defect,2024-08-27T07:12:28Z,Networking: DNS,"Bad name, functionality for network.trr.exclud...",...,1915085,--,Firefox 129,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[bad, name, functionality, networktrrexcludeet...","[user, agent, mozilla50, macintosh, intel, mac..."
2,2,,Core,2024-08-27T06:56:08Z,--,Components,task,2024-08-27T07:02:38Z,WebRTC: Audio/Video,Request sRGB colorspace from ScreenCapturerSck,...,1915082,,unspecified,pehrsons,489889,apehrson@mozilla.com,Andreas Pehrson [:pehrsons],apehrson@mozilla.com,"[request, srgb, colorspace, screencapturersck]","[screencapturekit, doc, http, developerappleco..."
3,3,,Core,2024-08-27T06:13:19Z,P5,Components,defect,2024-08-27T06:17:48Z,Panning and Zooming,Intermittent gfx/layers/apz/test/mochitest/tes...,...,1915078,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, gfxlayersapztestmochitesttest_g...","[filed, nfay, mozillacom, parsed, log, http, t..."
4,4,,Core,2024-08-27T05:29:11Z,--,Components,defect,2024-08-27T06:25:14Z,DOM: Core & HTML,Removing meta viewport tag has no effect,...,1915077,--,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[removing, meta, viewport, tag, effect]","[str, 1, open, site, meta, viewport, element, ..."


In [None]:
data.to_csv("dataset_20240907_plusP.csv")

# Checkpoint 3

In [None]:
data = pd.read_csv('dataset_20240907_plusP.csv', converters={'Psummary': clean})
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,resolution,product,creation_time,priority,classification,type,last_change_time,component,...,id,severity,version,assigned_to_detail.nick,assigned_to_detail.id,assigned_to_detail.name,assigned_to_detail.real_name,assigned_to_detail.email,Psummary,Pdescription
0,0,0,,Core,2024-08-27T07:10:15Z,P5,Components,defect,2024-08-27T07:10:15Z,Widget,...,1915086,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, widgettestsbrowserbrowser_test_...","['filed', 'nfay', 'mozillacom', 'parsed', 'log..."
1,1,1,,Core,2024-08-27T07:09:56Z,--,Components,defect,2024-08-27T07:12:28Z,Networking: DNS,...,1915085,--,Firefox 129,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[bad, name, functionality, networktrrexcludeet...","['user', 'agent', 'mozilla50', 'macintosh', 'i..."
2,2,2,,Core,2024-08-27T06:56:08Z,--,Components,task,2024-08-27T07:02:38Z,WebRTC: Audio/Video,...,1915082,,unspecified,pehrsons,489889,apehrson@mozilla.com,Andreas Pehrson [:pehrsons],apehrson@mozilla.com,"[request, srgb, colorspace, screencapturersck]","['screencapturekit', 'doc', 'http', 'developer..."
3,3,3,,Core,2024-08-27T06:13:19Z,P5,Components,defect,2024-08-27T06:17:48Z,Panning and Zooming,...,1915078,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, gfxlayersapztestmochitesttest_g...","['filed', 'nfay', 'mozillacom', 'parsed', 'log..."
4,4,4,,Core,2024-08-27T05:29:11Z,--,Components,defect,2024-08-27T06:25:14Z,DOM: Core & HTML,...,1915077,--,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[removing, meta, viewport, tag, effect]","['str', '1', 'open', 'site', 'meta', 'viewport..."


In [None]:
data.drop(axis=1, columns=['Unnamed: 0.1','Unnamed: 0'], inplace=True)
data.head()

Unnamed: 0,resolution,product,creation_time,priority,classification,type,last_change_time,component,summary,description,...,id,severity,version,assigned_to_detail.nick,assigned_to_detail.id,assigned_to_detail.name,assigned_to_detail.real_name,assigned_to_detail.email,Psummary,Pdescription
0,,Core,2024-08-27T07:10:15Z,P5,Components,defect,2024-08-27T07:10:15Z,Widget,Intermittent widget/tests/browser/browser_test...,**Filed by:** nfay [at] mozilla.com\r\n**Parse...,...,1915086,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, widgettestsbrowserbrowser_test_...","['filed', 'nfay', 'mozillacom', 'parsed', 'log..."
1,,Core,2024-08-27T07:09:56Z,--,Components,defect,2024-08-27T07:12:28Z,Networking: DNS,"Bad name, functionality for network.trr.exclud...",User Agent: Mozilla/5.0 (Macintosh; Intel Mac ...,...,1915085,--,Firefox 129,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[bad, name, functionality, networktrrexcludeet...","['user', 'agent', 'mozilla50', 'macintosh', 'i..."
2,,Core,2024-08-27T06:56:08Z,--,Components,task,2024-08-27T07:02:38Z,WebRTC: Audio/Video,Request sRGB colorspace from ScreenCapturerSck,[The ScreenCaptureKit docs](https://developer....,...,1915082,,unspecified,pehrsons,489889,apehrson@mozilla.com,Andreas Pehrson [:pehrsons],apehrson@mozilla.com,"[request, srgb, colorspace, screencapturersck]","['screencapturekit', 'doc', 'http', 'developer..."
3,,Core,2024-08-27T06:13:19Z,P5,Components,defect,2024-08-27T06:17:48Z,Panning and Zooming,Intermittent gfx/layers/apz/test/mochitest/tes...,**Filed by:** nfay [at] mozilla.com\r\n**Parse...,...,1915078,S4,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[intermittent, gfxlayersapztestmochitesttest_g...","['filed', 'nfay', 'mozillacom', 'parsed', 'log..."
4,,Core,2024-08-27T05:29:11Z,--,Components,defect,2024-08-27T06:25:14Z,DOM: Core & HTML,Removing meta viewport tag has no effect,STR;\r\n\r\n1. Open any sites having a meta vi...,...,1915077,--,unspecified,nobody,1,nobody@mozilla.org,Nobody; OK to take it and work on it,nobody@mozilla.org,"[removing, meta, viewport, tag, effect]","['str', '1', 'open', 'site', 'meta', 'viewport..."


In [None]:
# from sklearn.feature_selection import mutual_info_classif

In [None]:
type(data.iloc[0]['Psummary'])
# type(data['Psummary'])

list

In [None]:
exploded = data.explode('Psummary')

In [None]:
filter='^\d+$'
notfilter='^(?!'+filter+').*$'
exploded_filtered = exploded.loc[exploded['Psummary'].str.contains(notfilter)]

In [None]:
checkit = just_dummies(exploded_filtered,'Psummary')

In [None]:
checkit.index.name = 'index'

In [None]:
checkit = dd.from_pandas(checkit, chunksize=100)

In [None]:
checkit.head()

Unnamed: 0_level_0,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,Psummary_0px,...,Psummary_zoomteams,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
# prefix='Psummary_'
# filter=prefix+'\d+$'
# notfilter='^(?!'+filter+').*$'
# print(len(checkit.filter(regex=(notfilter)).columns.tolist()))
# # print(checkit.filter(regex=(notfilter)).columns.tolist())

In [None]:
checkot = checkit.groupby(['index'], observed=True).sum()

In [None]:
checkot['nindex'] = checkot.index

In [None]:
checkot.head()

Unnamed: 0_level_0,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,Psummary_0px,...,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰,nindex
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


In [None]:
checket = checkot.compute()

In [None]:
checkit = checket.reset_index()

In [None]:
checkit.head()

Unnamed: 0,index,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,...,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰,nindex
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


In [None]:
# def rowgroupbyidnex(df):
#     rows = []
#     for i in len(df):
#         rows.append(df.iloc[i].groupby(checkit.index, observed=True).sum())





In [None]:
checkit.to_csv('dataset_20240907_plusP_Psummary.csv')

# Checkpoint 4

In [10]:
data = pd.read_csv('dataset_20240907_plusP.csv', converters={'Pdescription': clean,'Psummary': clean})

In [11]:
summary = pd.read_csv('dataset_20240907_plusP_Psummary.csv')

In [12]:
exploded = data.explode('Pdescription')
filter='^\d+$'
notfilter='^(?!'+filter+').*$'

exploded = exploded.loc[exploded['Pdescription'].notna()]

exploded_filtered = exploded.loc[exploded['Pdescription'].str.contains(notfilter)]

In [None]:
exploded_filtered['nindex'] = exploded_filtered.index

In [None]:
exploded_filtered = exploded_filtered.reset_index()
checkit = just_dummies_steps(exploded_filtered[['nindex','Pdescription']],'Pdescription','nindex')

In [None]:
checkit = dd.from_pandas(checkit, chunksize=100)

In [None]:
checkot = checkit.groupby([checkit.index], observed=True).sum()
checkot['nindex'] = checkot.index
checket = checkot.compute()
checkit = checket.reset_index()
checkit.to_csv('dataset_20240907_plusP_Pdescription.csv')

In [13]:
summary.drop(axis=1, columns=['Unnamed: 0','nindex','index'], inplace=True)
summary.head()

Unnamed: 0,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,Psummary_0px,...,Psummary_zoomteams,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
data['Psummary']=data['Psummary'].tolist()

In [14]:
data['Pdescription']=data['Pdescription'].apply(lambda x: ' '.join(x))
data['Psummary']=data['Psummary'].apply(lambda x: ' '.join(x))

In [None]:
summary.head()

Unnamed: 0,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,Psummary_0px,...,Psummary_zoomteams,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

def tfidf(df,col):
  vectorizer = TfidfVectorizer()
  vectors = vectorizer.fit_transform(df[col])

  # Select the first five documents from the data set
  tf_idf = pd.DataFrame(vectors.todense())#.iloc[:5]
  tf_idf.columns = vectorizer.get_feature_names_out()
  tfidf_matrix = tf_idf.T
  tfidf_matrix.columns = ['bug'+ str(i) for i in range(1, 10001)]
  tfidf_matrix['count'] = tfidf_matrix.sum(axis=1)

  # Top words
  tfidf_matrix = tfidf_matrix.sort_values(by ='count', ascending=False)

  # Print the first 10 words
  # print(tfidf_matrix.drop(columns=['count']).head(10))

  return tfidf_matrix


In [26]:
# tfidf_Pdescription = tfidf(data,'Pdescription')
tfidf_Psummary = tfidf(data,'Psummary')
over1 = tfidf_Psummary.loc[tfidf_Psummary['count']>1]

In [28]:
filter='^\d+$'
notfilter='^(?!'+filter+').*$'
wordlst = over1.loc[over1.index.str.contains(notfilter)]

In [29]:
wordlst['words'] = 'Psummary_' + wordlst.index.astype(str)
wordlst.head()

Unnamed: 0,bug1,bug2,bug3,bug4,bug5,bug6,bug7,bug8,bug9,bug10,...,bug9993,bug9994,bug9995,bug9996,bug9997,bug9998,bug9999,bug10000,count,words
intermittent,0.239473,0.0,0.0,0.185069,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,379.11454,Psummary_intermittent
bug,0.260809,0.0,0.0,0.201557,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,354.356753,Psummary_bug
tracking,0.262563,0.0,0.0,0.202912,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,352.168633,Psummary_tracking
single,0.264538,0.0,0.0,0.204439,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,349.34071,Psummary_single
sync,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,213.736062,Psummary_sync


In [30]:
summary.head()

Unnamed: 0,Psummary_00f,Psummary_01f,Psummary_04347e67c6f87ee7a33c8ed8103aecebac6c3888,Psummary_0a1,Psummary_0async,Psummary_0cad754da2ee3ececcfa1aad8b858a0286c24e16,Psummary_0dc559f060db0d62d95f424e3fd26a5f673b2f6b,Psummary_0e30966b198ad28943799eaf5b3b08100b6f70c3,Psummary_0kb,Psummary_0px,...,Psummary_zoomteams,Psummary_zoomus,Psummary_zstd,Psummary_zstd_dctx_setparameter,Psummary_zwp_tablet_tool_v2_set_cursor,Psummary_zwsetevent,Psummary_zwusermsgwaitformultipleobjectsex,Psummary_zydis,Psummary_ñ,Psummary_㜱㜸㤱㠰㤴㜶㔰㔰
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
summary = summary[wordlst['words']]
summary.head()

Unnamed: 0,Psummary_intermittent,Psummary_bug,Psummary_tracking,Psummary_single,Psummary_sync,Psummary_pr,Psummary_wptsync,Psummary_test,Psummary_wpt,Psummary_tier,...,Psummary_loopffsh,Psummary_unified,Psummary_loong64,Psummary_imagetestreftesticoicobmp8bppicosize1x18bppico,Psummary_downloads,Psummary_mitigate,Psummary_testsjittestjittesttestsgcbug1517158js,Psummary_fuse,Psummary_lighter,Psummary_eventsource
0,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
summary.tail()

Unnamed: 0,Psummary_intermittent,Psummary_bug,Psummary_tracking,Psummary_single,Psummary_sync,Psummary_pr,Psummary_wptsync,Psummary_test,Psummary_wpt,Psummary_tier,...,Psummary_loopffsh,Psummary_unified,Psummary_loong64,Psummary_imagetestreftesticoicobmp8bppicosize1x18bppico,Psummary_downloads,Psummary_mitigate,Psummary_testsjittestjittesttestsgcbug1517158js,Psummary_fuse,Psummary_lighter,Psummary_eventsource
9994,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9995,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9996,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9997,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9998,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
mergeddata = pd.concat([data,summary],axis=1)

mergeddata.to_csv('dataset_20240907_plusP_Psummary_tfidf.csv')

# Checkpoint 5

In [9]:
mergeddata = pd.read_csv('dataset_20240907_plusP_Psummary_tfidf.csv')

  mergeddata = pd.read_csv('dataset_20240907_plusP_Psummary_tfidf.csv')


In [10]:
mergeddata.head()
mergeddata['resolution'] = mergeddata['resolution'].fillna('')
mergeddata['severity'] = mergeddata['severity'].fillna('SN/A')
# mergeddata['resolution'] = mergeddata['resolution'].astype(str)

In [11]:
mergeddata.drop([
'Unnamed: 0.2',
'Unnamed: 0.1',
'Unnamed: 0',
'Unnamed: 0.2',
'Unnamed: 0.1',
'Unnamed: 0',
'summary',
'description',
'Psummary',
'Pdescription',
'assigned_to_detail.real_name'
],axis=1,inplace=True)

mergeddata.head()

Unnamed: 0,resolution,product,creation_time,priority,classification,type,last_change_time,component,status,assigned_to,...,Psummary_loopffsh,Psummary_unified,Psummary_loong64,Psummary_imagetestreftesticoicobmp8bppicosize1x18bppico,Psummary_downloads,Psummary_mitigate,Psummary_testsjittestjittesttestsgcbug1517158js,Psummary_fuse,Psummary_lighter,Psummary_eventsource
0,,Core,2024-08-27T07:10:15Z,P5,Components,defect,2024-08-27T07:10:15Z,Widget,NEW,nobody@mozilla.org,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,,Core,2024-08-27T07:09:56Z,--,Components,defect,2024-08-27T07:12:28Z,Networking: DNS,UNCONFIRMED,nobody@mozilla.org,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,,Core,2024-08-27T06:56:08Z,--,Components,task,2024-08-27T07:02:38Z,WebRTC: Audio/Video,ASSIGNED,apehrson@mozilla.com,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,,Core,2024-08-27T06:13:19Z,P5,Components,defect,2024-08-27T06:17:48Z,Panning and Zooming,NEW,nobody@mozilla.org,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,,Core,2024-08-27T05:29:11Z,--,Components,defect,2024-08-27T06:25:14Z,DOM: Core & HTML,NEW,nobody@mozilla.org,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
mergeddata = mergeddata[mergeddata['priority']!= '--']
# mergeddata.loc[mergeddata['priority']=='--'].head()


mergeddata = mergeddata[mergeddata['severity']!= '--']
mergeddata['severity'].replace('normal','S3')
# mergeddata.loc[mergeddata['severity']=='--'].head()

len(mergeddata)

4110

In [13]:
mergeddata['severity'].unique()

array(['S4', 'SN/A', 'S3', 'S2', 'S1'], dtype=object)

In [14]:
tipos = mergeddata.dtypes
# tipos['columna']=tipos.index
# tipos.rename({'0':'tipo'})
tipos = tipos.to_frame()

# columnas = tipos.loc[tipos['tipo']=='object'].index
columnas = tipos.loc[tipos[0]!='float'].index.to_list()
columnas

for columna in columnas:
    print(columna)
    print(mergeddata[columna].apply(type).unique())
    print('--------------------------------------------')

resolution
[<class 'str'>]
--------------------------------------------
product
[<class 'str'>]
--------------------------------------------
creation_time
[<class 'str'>]
--------------------------------------------
priority
[<class 'str'>]
--------------------------------------------
classification
[<class 'str'>]
--------------------------------------------
type
[<class 'str'>]
--------------------------------------------
last_change_time
[<class 'str'>]
--------------------------------------------
component
[<class 'str'>]
--------------------------------------------
status
[<class 'str'>]
--------------------------------------------
assigned_to
[<class 'str'>]
--------------------------------------------
id
[<class 'int'>]
--------------------------------------------
severity
[<class 'str'>]
--------------------------------------------
version
[<class 'str'>]
--------------------------------------------
assigned_to_detail.nick
[<class 'str'>]
---------------------------------------

# XGBoost

In [41]:
mergeddata.head()
# mergeddata = dummies(mergeddata,'resolution')
# mergeddata = dummies(mergeddata,'product')
# mergeddata = dummies(mergeddata,'priority')
# mergeddata = dummies(mergeddata,'classification')
# mergeddata = dummies(mergeddata,'type')
# mergeddata = dummies(mergeddata,'component')
# mergeddata = dummies(mergeddata,'assigned_to')
mergeddata, severity_le= lencoder(mergeddata,'severity')
# mergeddata = dummies(mergeddata,'version')
# mergeddata = dummies(mergeddata,'assigned_to_detail.nick')
# mergeddata = dummies(mergeddata,'assigned_to_detail.id')
# mergeddata = dummies(mergeddata,'assigned_to_detail.name')
# mergeddata = dummies(mergeddata,'assigned_to_detail.email')


In [43]:
from numpy import loadtxt
import xgboost as xgb
from xgboost import DMatrix, XGBClassifier

dataset = mergeddata

X = dataset.drop(['severity','priority'],axis=1)
Y = dataset['severity']

# split data into train and test sets
seed = 7
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

dtrain = DMatrix(X_train, label=y_train, enable_categorical=True)

params = {
    'objective': 'binary:logistic',
    'tree_method': 'hist'  # 'hist' or 'approx' are recommended for categorical data
}

# fit model on training data
# model = XGBClassifier()
# model.fit(X_train, y_train)
model = xgb.train(params, dtrain, num_boost_round=10)

# make predictions for test data
dpredict = xgb.DMatrix(X_test, enable_categorical=True)

# y_pred = model.predict(X_test)
y_pred = model.predict(dpredict)


# predictions = [round(value) for value in y_pred]

scores(model, X_train, X_test, y_test, y_pred)

ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, the experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:resolution: object, product: object, creation_time: object, classification: object, type: object, last_change_time: object, component: object, status: object, assigned_to: object, version: object, assigned_to_detail.nick: object, assigned_to_detail.name: object, assigned_to_detail.email: object

In [None]:
from numpy import loadtxt
import xgboost as xgb
from xgboost import DMatrix, XGBClassifier

dataset = mergeddata

X = dataset.drop(['severity','priority'],axis=1)
Y = dataset['severity']

# split data into train and test sets
seed = 7
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

dtrain = DMatrix(X_train, label=y_train, enable_categorical=True)

params = {
    'objective': 'binary:logistic',
    'tree_method': 'hist'  # 'hist' or 'approx' are recommended for categorical data
}

# fit model on training data
# model = XGBClassifier()
# model.fit(X_train, y_train)
model = xgb.train(params, dtrain, num_boost_round=10)

# make predictions for test data
dpredict = xgb.DMatrix(X_test, enable_categorical=True)

# y_pred = model.predict(X_test)
y_pred = model.predict(dpredict)


# predictions = [round(value) for value in y_pred]

scores(model, X_train, X_test, y_test, y_pred)

ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, the experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:resolution: object, product: object, creation_time: object, classification: object, type: object, last_change_time: object, component: object, status: object, assigned_to: object, version: object, assigned_to_detail.nick: object, assigned_to_detail.name: object, assigned_to_detail.email: object

# Linear Regression

In [73]:
# mergeddata['classification'].unique()

In [15]:
mergeddata = dummies(mergeddata,'resolution')
mergeddata = dummies(mergeddata,'product')
mergeddata, lemodel = lencoder(mergeddata,'priority')
mergeddata = dummies(mergeddata,'classification')
mergeddata = dummies(mergeddata,'type')
mergeddata = dummies(mergeddata,'component')
mergeddata = dummies(mergeddata,'assigned_to')
mergeddata, lemodel = lencoder(mergeddata,'severity')
mergeddata, lemodel = lencoder(mergeddata,'version')
mergeddata = dummies(mergeddata,'assigned_to_detail.nick')
mergeddata = dummies(mergeddata,'assigned_to_detail.id')
mergeddata = dummies(mergeddata,'assigned_to_detail.name')
mergeddata = dummies(mergeddata,'assigned_to_detail.email')
mergeddata['creation_time'] = pd.to_datetime(mergeddata['creation_time'])
mergeddata['last_change_time'] = pd.to_datetime(mergeddata['last_change_time'])

In [16]:
def columns_with_nan(df):
    # Obtener una lista de los nombres de las columnas
    columns = df.columns.to_list()
    
    # Filtrar las columnas que tienen valores NaN
    columns_with_nan = [col for col in columns if df[col].isna().any()]
    
    return columns_with_nan

In [17]:
columns_with_nan(mergeddata)

['Psummary_intermittent',
 'Psummary_bug',
 'Psummary_tracking',
 'Psummary_single',
 'Psummary_sync',
 'Psummary_pr',
 'Psummary_wptsync',
 'Psummary_test',
 'Psummary_wpt',
 'Psummary_tier',
 'Psummary_failure',
 'Psummary_add',
 'Psummary_crash',
 'Psummary_firefox',
 'Psummary_code',
 'Psummary_new',
 'Psummary_remove',
 'Psummary_fix',
 'Psummary_update',
 'Psummary_failing',
 'Psummary_args',
 'Psummary_assertion',
 'Psummary_nt',
 'Psummary_http',
 'Psummary_implement',
 'Psummary_support',
 'Psummary_unknown',
 'Psummary_window',
 'Psummary_mozilla',
 'Psummary_use',
 'Psummary_video',
 'Psummary_page',
 'Psummary_error',
 'Psummary_set',
 'Psummary_meta',
 'Psummary_text',
 'Psummary_perma',
 'Psummary_using',
 'Psummary_element',
 'Psummary_file',
 'Psummary_work',
 'Psummary_does_not',
 'Psummary_event',
 'Psummary_etp',
 'Psummary_export',
 'Psummary_strict',
 'Psummary_webkit',
 'Psummary_enable',
 'Psummary_make',
 'Psummary_dom',
 'Psummary_bugswebkitorgshow_bugcgi',
 'P

In [18]:
def rows_with_nan(df, column_name):
    # Filtrar las filas donde la columna especificada tiene valores NaN
    rows_with_nan = df[df[column_name].isna()]
    return rows_with_nan
rows_with_nan(mergeddata,'Psummary_intermittent')
mergeddata.drop(index=9999, inplace=True)

In [20]:
linear = LinearRegression()

dataset = mergeddata

X = dataset.drop(['severity','priority'
,'resolution'
,'product'
,'creation_time'
,'classification'
,'type'
,'last_change_time'
,'component'
,'status'
,'assigned_to'
,'assigned_to_detail.nick'
,'assigned_to_detail.name'
,'assigned_to_detail.email'
                ],axis=1)
Y = dataset['severity']

seed = 7
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

linear.fit(X_train, y_train)
print('Score: '+str(linear.score(X_train, y_train)))
print('Score: '+str(linear.score(X_test, y_test)))
# y_pred(linear.predict())
scores(linear, X_train, X_test, y_train, y_test)

Score: 0.973729517385547
Score: -15.477014097385087


ValueError: Found input variables with inconsistent numbers of samples: [2876, 1233]

In [21]:
from pycaret.classification import *

clf1 = dataset.drop(['severity','priority'
,'resolution'
,'product'
,'creation_time'
,'classification'
,'type'
,'last_change_time'
,'component'
,'status'
,'assigned_to'
,'assigned_to_detail.nick'
,'assigned_to_detail.name'
,'assigned_to_detail.email'
                ],axis=1)

clf1 = setup(data, target='severity')

# Compare models
best_model = compare_models()

RuntimeError: ('Pycaret only supports python 3.9, 3.10, 3.11. Your actual Python version: ', sys.version_info(major=3, minor=12, micro=3, releaselevel='final', serial=0), 'Please DOWNGRADE your Python version.')