## Importing Pandas Library

In [1]:
import pandas as pd

## Importing data file in the Python environment
Change the Windows folder location as per your system 

In [2]:
#### Importing Data
imp_data = pd.read_csv("C:\\Ujjwal\\Analytics\\Git\\Scenario_4\\Sample_Data.csv")
imp_data.head(2)

Unnamed: 0,Article_Number,Sentence_Number,Article_Publish_Date,Keyword,Article_Body,Article_Keywords
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente..."
1,0,2,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"stake, cirque, consortium proposal"


## Converting Article_keywords into a list

In [3]:
#### Converting keywords into a list structure
imp_data["Article_Keywords_Converted"] = imp_data["Article_Keywords"].str.split(",")
imp_data.loc[0,"Article_Keywords_Converted"]

['cirque du soleil',
 ' a agreement',
 ' horse bid',
 ' entertainment group',
 ' a deal',
 ' capital',
 ' court heard',
 ' friday']

## Solution - Pandas Version before 0.25.0

#### Step 1 - Convert the column containing list object to series

In [4]:
#### Converting each list value to series
temp_df = imp_data["Article_Keywords_Converted"].apply(pd.Series)
temp_df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,cirque du soleil,a agreement,horse bid,entertainment group,a deal,capital,court heard,friday,,,...,,,,,,,,,,
1,stake,cirque,consortium proposal,,,,,,,,...,,,,,,,,,,


#### Step 2 - Merge temporary dataframe with original one

In [5]:
#### Merging with temporary dataframe
imp_data = pd.merge(imp_data, temp_df, right_index=True, left_index=True, how = "left")
imp_data.head(2)

Unnamed: 0,Article_Number,Sentence_Number,Article_Publish_Date,Keyword,Article_Body,Article_Keywords,Article_Keywords_Converted,0,1,2,...,60,61,62,63,64,65,66,67,68,69
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...","[cirque du soleil, a agreement, horse bid, ...",cirque du soleil,a agreement,horse bid,...,,,,,,,,,,
1,0,2,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"stake, cirque, consortium proposal","[stake, cirque, consortium proposal]",stake,cirque,consortium proposal,...,,,,,,,,,,


#### Step 3 - Melt the columns of Temporary dataframe (which are now part of the original dataframe) 

In [6]:
#### Reshaping data
imp_data = pd.melt(imp_data, 
                   id_vars=['Article_Number','Sentence_Number',
                            'Article_Publish_Date','Keyword',
                            'Article_Body','Article_Keywords',
                            'Article_Keywords_Converted'],
                  value_name="Keywords")
imp_data.head(2)

Unnamed: 0,Article_Number,Sentence_Number,Article_Publish_Date,Keyword,Article_Body,Article_Keywords,Article_Keywords_Converted,variable,Keywords
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...","[cirque du soleil, a agreement, horse bid, ...",0,cirque du soleil
1,0,2,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"stake, cirque, consortium proposal","[stake, cirque, consortium proposal]",0,stake


#### Step 4 - Drop records with null values in keyword field and undesired columns

In [7]:
#### Dropping null values
imp_data.dropna(inplace = True)
imp_data.drop(["Article_Keywords", "Article_Keywords_Converted", "variable"], axis = 1, inplace=True)
imp_data.head(2)

Unnamed: 0,Article_Number,Sentence_Number,Article_Publish_Date,Keyword,Article_Body,Keywords
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,cirque du soleil
1,0,2,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,stake


## Solution - Pandas Version 0.25.0 and Above 

In [8]:
#### One step solution - Explode
imp_data = pd.read_csv("C:\\Ujjwal\\Analytics\\Git\\Scenario_4\\Sample_Data.csv")
imp_data["Article_Keywords_Converted"] = imp_data["Article_Keywords"].str.split(",")
imp_data = imp_data.explode("Article_Keywords_Converted")
imp_data.head()

Unnamed: 0,Article_Number,Sentence_Number,Article_Publish_Date,Keyword,Article_Body,Article_Keywords,Article_Keywords_Converted
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...",cirque du soleil
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...",a agreement
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...",horse bid
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...",entertainment group
0,0,1,7/10/2020,filed bankruptcy,cirque du soleil and its secured creditors ar...,"cirque du soleil, a agreement, horse bid, ente...",a deal
