### Preparing A Dataset for Multi-Label Classification

#### Data
+ stackoverflow dataset

In [6]:
# Load EDA Pkgs
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
# Load Dataset
df = pd.read_csv("data/stackoverflow_dataset1.csv")

In [8]:
df.head()

Unnamed: 0,title,tags
0,Flask-SQLAlchemy - When are the tables/databas...,"['python', 'mysql']"
1,Combining two PHP variables for MySQL query,"['php', 'mysql']"
2,'Counting' the number of records that match a ...,"['php', 'mysql']"
3,Insert new row in a table and auto id number. ...,"['php', 'mysql']"
4,Create Multiple MySQL tables using PHP,"['php', 'mysql']"


In [10]:
# Check for datatype
df.dtypes

title    object
tags     object
dtype: object

In [11]:
# Shape
df.shape

(144, 2)

In [12]:
# Convert To Multilabel using binarization (0,1)
#### Steps
+ convert to set
+ multilabel-binarizer (0,1)

In [14]:
# Convert to a set
import ast

In [17]:
df['new_tags'] = df['tags'].apply(ast.literal_eval).apply(set)

In [18]:
df.head()

Unnamed: 0,title,tags,new_tags
0,Flask-SQLAlchemy - When are the tables/databas...,"['python', 'mysql']","{python, mysql}"
1,Combining two PHP variables for MySQL query,"['php', 'mysql']","{php, mysql}"
2,'Counting' the number of records that match a ...,"['php', 'mysql']","{php, mysql}"
3,Insert new row in a table and auto id number. ...,"['php', 'mysql']","{php, mysql}"
4,Create Multiple MySQL tables using PHP,"['php', 'mysql']","{php, mysql}"


In [19]:
df.dtypes

title       object
tags        object
new_tags    object
dtype: object

In [20]:
# Using Multilabel Binarizer
from sklearn.preprocessing import MultiLabelBinarizer

In [21]:
# Create an instance
mlb = MultiLabelBinarizer()

In [23]:
mlb.fit_transform(df['new_tags'])

array([[1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1,

In [24]:
# Get classes
mlb.classes_

array(['mysql', 'php', 'python'], dtype=object)

In [25]:
pd.DataFrame(mlb.fit_transform(df['new_tags']),columns=mlb.classes_,index=df.index)

Unnamed: 0,mysql,php,python
0,1,0,1
1,1,1,0
2,1,1,0
3,1,1,0
4,1,1,0
...,...,...,...
139,1,0,1
140,1,0,1
141,1,1,0
142,1,1,0


In [26]:
df2 = df.join(pd.DataFrame(mlb.fit_transform(df['new_tags']),columns=mlb.classes_,index=df.index))

In [27]:
df2

Unnamed: 0,title,tags,new_tags,mysql,php,python
0,Flask-SQLAlchemy - When are the tables/databas...,"['python', 'mysql']","{python, mysql}",1,0,1
1,Combining two PHP variables for MySQL query,"['php', 'mysql']","{php, mysql}",1,1,0
2,'Counting' the number of records that match a ...,"['php', 'mysql']","{php, mysql}",1,1,0
3,Insert new row in a table and auto id number. ...,"['php', 'mysql']","{php, mysql}",1,1,0
4,Create Multiple MySQL tables using PHP,"['php', 'mysql']","{php, mysql}",1,1,0
...,...,...,...,...,...,...
139,"Executing ""SELECT ... WHERE ... IN ..."" using ...","['python', 'mysql']","{python, mysql}",1,0,1
140,SQLAlchemy reconnect to db,"['python', 'mysql']","{python, mysql}",1,0,1
141,mysql Count Distinct and get result using php,"['php', 'mysql']","{php, mysql}",1,1,0
142,How to store the result of a radio button with...,"['php', 'mysql']","{php, mysql}",1,1,0


In [28]:
### Thanks For Watching
### Jesus saves @JCharisTech
### Jesse E.Agbe(JCharis)