In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('javascript_tutorial_data.csv')
df.head()

Unnamed: 0,Module,Subtopic,Subtopic Link,Content
0,Section 1. Array properties,length property,https://www.javascripttutorial.net/javascript-...,"Summary: in this tutorial, you’ll learn about ..."
1,Section 2. Adding/removing elements,push(),https://www.javascripttutorial.net/javascript-...,"Summary: in this tutorial, you’ll learn how to..."
2,Section 2. Adding/removing elements,unshift(),https://www.javascripttutorial.net/javascript-...,"Summary: in this tutorial, you’ll learn how to..."
3,Section 2. Adding/removing elements,pop(),https://www.javascripttutorial.net/javascript-...,"Summary: in this tutorial, you’ll learn how to..."
4,Section 2. Adding/removing elements,shift(),https://www.javascripttutorial.net/javascript-...,"Summary: in this tutorial, you’ll learn how to..."


## Data Wrangling

In [3]:
# Check for missing values
df.isnull().sum()

Module           0
Subtopic         0
Subtopic Link    0
Content          0
dtype: int64

In [4]:
# Drop the feature with links as it is not useful
df.drop('Subtopic Link', axis=1, inplace=True)

In [5]:
# Extract section numbers from Module and create new column
df['Section'] = df['Module'].str.extract(r'Section (\d+)')

# Clean Module column by removing 'Section X.' prefix
df['Module'] = df['Module'].str.replace(r'Section \d+\.\s*', '', regex=True)

# Clean Content by removing 'Summary:' prefix
df['Content'] = df['Content'].str.replace(r'^Summary:\s*', '', regex=True)

# Verify changes
print("Sample results:")
print("\nSection:", df['Section'].head())
print("\nCleaned Module:", df['Module'].head())
print("\nCleaned Content:", df['Content'].head())

Sample results:

Section: 0    1
1    2
2    2
3    2
4    2
Name: Section, dtype: object

Cleaned Module: 0            Array properties
1    Adding/removing elements
2    Adding/removing elements
3    Adding/removing elements
4    Adding/removing elements
Name: Module, dtype: object

Cleaned Content: 0    in this tutorial, you’ll learn about the JavaS...
1    in this tutorial, you’ll learn how to use the ...
2    in this tutorial, you’ll learn how to use the ...
3    in this tutorial, you’ll learn how to use the ...
4    in this tutorial, you’ll learn how to use the ...
Name: Content, dtype: object


In [6]:
df.head()

Unnamed: 0,Module,Subtopic,Content,Section
0,Array properties,length property,"in this tutorial, you’ll learn about the JavaS...",1
1,Adding/removing elements,push(),"in this tutorial, you’ll learn how to use the ...",2
2,Adding/removing elements,unshift(),"in this tutorial, you’ll learn how to use the ...",2
3,Adding/removing elements,pop(),"in this tutorial, you’ll learn how to use the ...",2
4,Adding/removing elements,shift(),"in this tutorial, you’ll learn how to use the ...",2


In [7]:
# Save cleaned data to new file
df.to_csv('cleaned_javascript_tutorial_data.csv', index=False)