In [0]:
data = spark.sql('SELECT * from linkedin')
data = data.toPandas()
data.head()

Unnamed: 0,Post_ID,Text
0,1,Normalization is a technique often applied as ...
1,2,Training a Machine Learning Model in PyCaret e...
2,3,If an AI algorithm turns the copyrighted work ...
3,4,Do you know you can write nested functions in ...
4,5,Have you still not used PyCaret? Maybe it's ti...


In [0]:
print(data['Text'][0])

In [0]:
print(data['Text'][1])

# Text Preprocessing

- Convert text into tokens
- Remove stop words
- Stem / Lemmatize
- Build bag of words

The only thing different this time is, instead of working with one text we have 10 text (in NLP terminology: we have 10 documents).

In [0]:
text = list(data['Text'])

In [0]:
type(text)

In [0]:
len(text)

In [0]:
import nltk
nltk.download('all')

In [0]:
import re
from nltk.corpus import stopwords

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

corpus = []

for i in range(len(text)):
    r = re.sub('[^a-zA-Z]', ' ', text[i])
    r = r.lower()
    r = r.split()
    r = [word for word in r if word not in stopwords.words('english')]
    r = [lemmatizer.lemmatize(word) for word in r]
    r = ' '.join(r)
    corpus.append(r)

In [0]:
type(corpus)

In [0]:
len(corpus)

In [0]:
print(text[0])

In [0]:
print(corpus[0])

# Bag of Words (CountVectorizer)
The bag-of-words model is a simplifying representation used in natural language processing and information retrieval (IR). In this model, a text (such as a sentence or a document) is represented as the bag (multiset) of its words, disregarding grammar and even word order but keeping multiplicity. The bag-of-words model has also been used for computer vision.

The bag-of-words model is commonly used in methods of document classification where the (frequency of) occurrence of each word is used as a feature for training a classifier.

An early reference to "bag of words" in a linguistic context can be found in Zellig Harris's 1954 article on Distributional Structure. (Wikipedia)

In [0]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()

In [0]:
# fit transform the CountVectorizer
X = cv.fit_transform(corpus)

In [0]:
# check the type of X
type(X)

In [0]:
# what is stored in X
print(X)

In [0]:
# convert X to array
X_array = X.toarray()

In [0]:
# check type of X_array
type(X_array)

In [0]:
# shape of X_array
X_array.shape

In [0]:
print(X_array)

In [0]:
# convert X_array into pandas dataframe
import pandas as pd
df = pd.DataFrame(X_array)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345
0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,2,0,0,0,0,0,2,1,0,0,0,0,0,1
1,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,3,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,2,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
3,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,5,5,0,0,0,1,0,1,0,0,0,0,0,0,5,...,0,0,0,0,2,0,0,0,1,5,7,1,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,...,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0
6,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,2,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0
8,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2,0,0,1,2,0,0,0,0,...,0,0,2,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,1,0
9,0,0,1,1,0,0,0,0,2,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [0]:
# assign column names to df
df.columns = cv.get_feature_names()
df.head(10)

Unnamed: 0,achieved,ai,algorithm,allows,along,also,alteryx,among,amount,analyze,anomaly,api,application,applied,approach,arima,association,automl,available,average,averaged,away,back,base,basically,best,better,boilerplate,broadly,built,bunch,called,case,catboost,categorized,category,certain,changed,choice,choose,...,text,thats,time,tool,train,trained,training,transformation,tree,true,tune,tuned,turn,tutorial,two,type,understand,unlabeled,unsupervised,us,use,used,using,usually,validated,validation,value,variable,version,want,way,whats,within,without,work,workload,would,write,xgboost,zscore
0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,2,0,0,0,0,0,2,1,0,0,0,0,0,1
1,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,3,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,2,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
3,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,5,5,0,0,0,1,0,1,0,0,0,0,0,0,5,...,0,0,0,0,2,0,0,0,1,5,7,1,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,...,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0
6,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,2,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0
8,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2,0,0,1,2,0,0,0,0,...,0,0,2,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,1,0
9,0,0,1,1,0,0,0,0,2,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [0]:
# You can now concat these features in the original data
new_data = pd.concat([data,df], axis=1)
new_data

Unnamed: 0,Post_ID,Text,achieved,ai,algorithm,allows,along,also,alteryx,among,amount,analyze,anomaly,api,application,applied,approach,arima,association,automl,available,average,averaged,away,back,base,basically,best,better,boilerplate,broadly,built,bunch,called,case,catboost,categorized,category,certain,changed,...,text,thats,time,tool,train,trained,training,transformation,tree,true,tune,tuned,turn,tutorial,two,type,understand,unlabeled,unsupervised,us,use,used,using,usually,validated,validation,value,variable,version,want,way,whats,within,without,work,workload,would,write,xgboost,zscore
0,1,Normalization is a technique often applied as ...,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,2,0,0,0,0,0,2,1,0,0,0,0,0,1
1,2,Training a Machine Learning Model in PyCaret e...,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,3,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,2,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,3,If an AI algorithm turns the copyrighted work ...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
3,4,Do you know you can write nested functions in ...,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,5,5,0,0,0,1,0,1,0,0,0,0,0,...,0,0,0,0,2,0,0,0,1,5,7,1,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,5,Have you still not used PyCaret? Maybe it's ti...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,6,Comparing estimators at the base level perform...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,1,0,...,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0
6,7,"In this tutorial, I will show you how you can ...",0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,2,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7,8,I know both are desirable but If I have to cho...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0
8,9,Time Series Forecasting with PyCaret Regressio...,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2,0,0,1,2,0,0,...,0,0,2,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,1,0
9,10,Semi-supervised learning is an approach to mac...,0,0,1,1,0,0,0,0,2,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


# TF-IDF
In information retrieval, tf–idf, TF*IDF, or TFIDF, short for term frequency–inverse document frequency, is a numerical statistic that is intended to reflect how important a word is to a document in a collection or corpus. It is often used as a weighting factor in searches of information retrieval, text mining, and user modeling. The tf–idf value increases proportionally to the number of times a word appears in the document and is offset by the number of documents in the corpus that contain the word, which helps to adjust for the fact that some words appear more frequently in general. tf–idf is one of the most popular term-weighting schemes today. A survey conducted in 2015 showed that 83% of text-based recommender systems in digital libraries use tf–idf.

Variations of the tf–idf weighting scheme are often used by search engines as a central tool in scoring and ranking a document's relevance given a user query. tf–idf can be successfully used for stop-words filtering in various subject fields, including text summarization and classification. (Wikipedia)

In [0]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()

# fit transform the TfidfVectorizer
X = tfidf.fit_transform(corpus)

# convert X to array
X_array = X.toarray()

df = pd.DataFrame(X_array)
df.columns = tfidf.get_feature_names()

new_data = pd.concat([data,df], axis=1)
new_data

Unnamed: 0,Post_ID,Text,achieved,ai,algorithm,allows,along,also,alteryx,among,amount,analyze,anomaly,api,application,applied,approach,arima,association,automl,available,average,averaged,away,back,base,basically,best,better,boilerplate,broadly,built,bunch,called,case,catboost,categorized,category,certain,changed,...,text,thats,time,tool,train,trained,training,transformation,tree,true,tune,tuned,turn,tutorial,two,type,understand,unlabeled,unsupervised,us,use,used,using,usually,validated,validation,value,variable,version,want,way,whats,within,without,work,workload,would,write,xgboost,zscore
0,1,Normalization is a technique often applied as ...,0.134066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.134066,0.0,0.0,0.0,0.0,0.113969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.134066,...,0.0,0.0,0.0,0.0,0.0,0.0,0.088649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.113969,0.0,0.0,0.143941,0.0,0.0,0.0,0.199418,0.0,0.0,0.0,0.0,0.0,0.268133,0.088649,0.0,0.0,0.0,0.0,0.0,0.134066
1,2,Training a Machine Learning Model in PyCaret e...,0.0,0.0,0.0,0.0,0.195938,0.0,0.0,0.0,0.0,0.0,0.097969,0.0,0.0,0.0,0.0,0.0,0.097969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.072863,0.218588,0.06478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083283,0.083283,0.0,0.0,0.105185,0.0,0.097969,0.083283,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06478,0.0,0.0,0.0,0.0,0.0,0.0
2,3,If an AI algorithm turns the copyrighted work ...,0.0,0.251851,0.187309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.251851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.251851,0.0,0.0,0.251851,0.0,0.0,0.0,0.0,0.0
3,4,Do you know you can write nested functions in ...,0.0,0.0,0.0,0.0,0.0,0.048309,0.0,0.048309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048309,0.0,0.0,0.0,0.0,0.0,0.0,0.048309,0.205337,0.241547,0.0,0.0,0.0,0.048309,0.0,0.041067,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.071858,0.0,0.0,0.0,0.048309,0.241547,0.338165,0.048309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031944,0.0,0.051868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031944,0.0,0.0,0.0,0.048309,0.0,0.0
4,5,Have you still not used PyCaret? Maybe it's ti...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.127158,0.127158,0.0,0.0,0.108096,0.0,0.127158,0.0,0.127158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.189142,0.108096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108096,0.068262,0.0,0.0,0.0,0.0,0.0,0.127158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6,Comparing estimators at the base level perform...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056321,0.0,0.0,0.075728,0.0,0.0,0.0,0.0,0.0,0.0,0.064376,0.0,0.075728,0.0,0.0,0.151456,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075728,0.0,0.0,0.075728,0.0,...,0.0,0.0,0.056321,0.0,0.0,0.0,0.150221,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075728,0.0,0.0,0.0,0.100147,0.0,0.121959,0.0,0.0,0.064376,0.0,0.0,0.0,0.0,0.075728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064376,0.0
6,7,"In this tutorial, I will show you how you can ...",0.0,0.0,0.0,0.0,0.0,0.0,0.35985,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.101968,0.178421,0.08921,0.0,0.11995,0.0,0.0,0.0,0.0,0.0,0.203937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064393,0.0,0.0,0.0,0.08921,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,I know both are desirable but If I have to cho...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.104572,0.0,0.0,0.0,0.0,0.0,...,0.0,0.123013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.123013,0.0,0.0,0.0,0.0,0.0,0.08134,0.0,0.0,0.123013,0.0,0.0,0.0,0.0,0.0,0.123013,0.0,0.0,0.0,0.08134,0.0,0.123013,0.0,0.0,0.0,0.0
8,9,Time Series Forecasting with PyCaret Regressio...,0.0,0.0,0.067161,0.0,0.0,0.0,0.0,0.0,0.067161,0.090303,0.0,0.0,0.0,0.0,0.0,0.090303,0.0,0.0,0.0,0.090303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090303,0.0,0.0,0.180606,0.0,0.0,0.090303,0.180606,0.0,0.0,...,0.0,0.0,0.134322,0.0,0.0,0.067161,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076766,0.0,0.0,0.0,0.0,0.0,0.0,0.059711,0.076766,0.0,0.0,0.0,0.0,0.067161,0.270909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076766,0.0
9,10,Semi-supervised learning is an approach to mac...,0.0,0.0,0.056107,0.07544,0.0,0.0,0.0,0.0,0.112213,0.0,0.0,0.0,0.07544,0.0,0.07544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.301758,0.0,0.0,0.0,0.0,0.0,0.199531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07544,0.0,0.150879,0.064131,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07544,0.0,0.0,0.0
