In [1]:
# Importing the required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
# Importing the dataset
data = pd.read_csv('genzmarketing_data.csv')
data.head(5) # Displaying the first 5 rows of the dataset

Unnamed: 0,title,url,content
0,The Neurodivergent Business Consortium (NB...,https://genzmarketing.xyz/blog/the-neurodiverg...,Loading... I was a very odd child to many peop...
1,,https://genzmarketing.xyz/service-CT,Loading... We know training and specific subje...
2,Cold Email Marketing| Portfolio\r\n,https://genzmarketing.xyz/portfolio/category/e...,Loading... Why is cold email marketing servic...
3,Video Advertisement| Portfolio\r\n,https://genzmarketing.xyz/portfolio/category/e...,Loading... Theme-based Video Advertisement F...
4,How Lead Generation Works? ( GenZMarketing...,https://genzmarketing.xyz/blog/how-lead-genera...,Loading... Lead generation is the process of a...


In [4]:
data.info() # Displaying the information about the dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    35 non-null     object
 1   url      41 non-null     object
 2   content  41 non-null     object
dtypes: object(3)
memory usage: 1.1+ KB


In [5]:
#  Convert to DataFrame
data = pd.DataFrame(data)

In [6]:
data.head(5) # Displaying the first 5 rows of the dataset

Unnamed: 0,title,url,content
0,The Neurodivergent Business Consortium (NB...,https://genzmarketing.xyz/blog/the-neurodiverg...,Loading... I was a very odd child to many peop...
1,,https://genzmarketing.xyz/service-CT,Loading... We know training and specific subje...
2,Cold Email Marketing| Portfolio\r\n,https://genzmarketing.xyz/portfolio/category/e...,Loading... Why is cold email marketing servic...
3,Video Advertisement| Portfolio\r\n,https://genzmarketing.xyz/portfolio/category/e...,Loading... Theme-based Video Advertisement F...
4,How Lead Generation Works? ( GenZMarketing...,https://genzmarketing.xyz/blog/how-lead-genera...,Loading... Lead generation is the process of a...


In [11]:
# Check the number of duplicate rows in the dataframe
duplicates = data.duplicated()
num_duplicates = duplicates.sum()
print(f"Number of duplicate rows: {num_duplicates}")

Number of duplicate rows: 0


In [12]:
# Check for missing values in the dataframe
missing_values = data.isnull().sum()
print(missing_values)

title      6
url        0
content    0
dtype: int64


In [13]:
# Display rows with missing values
missing_rows = data[data.isnull().any(axis=1)]
print(missing_rows)

   title                                    url  \
1    NaN   https://genzmarketing.xyz/service-CT   
16   NaN  https://genzmarketing.xyz/service-SMC   
24   NaN  https://genzmarketing.xyz/service-CeM   
25   NaN   https://genzmarketing.xyz/service-CC   
26   NaN  https://genzmarketing.xyz/service-PRC   
39   NaN  https://genzmarketing.xyz/service-SMM   

                                              content  
1   Loading... We know training and specific subje...  
16  Loading... GenZ researches and understands you...  
24  Loading... GenZ has a dedicated cold email app...  
25  Loading... GenZ has experienced and dedicated ...  
26  Loading... GenZ’s public relations (PR) and co...  
39  Loading... GenZ’s dedicated LinkedIn project m...  


In [14]:
# Handle missing values
data['title'] = data['title'].fillna("Untitled")

In [17]:
data.info() # Displaying the information about the dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    41 non-null     object
 1   url      41 non-null     object
 2   content  41 non-null     object
dtypes: object(3)
memory usage: 1.1+ KB


In [19]:
# Remove unwanted characters from the 'title' and 'content' columns
data['title'] = data['title'].str.replace(r'[|<>\r\n]', '', regex=True)
data['content'] = data['content'].str.replace(r'[|<>\r\n]', '', regex=True)

# Display the cleaned dataset
data.head(5)

Unnamed: 0,title,url,content
0,The Neurodivergent Business Consortium (NB...,https://genzmarketing.xyz/blog/the-neurodiverg...,Loading... I was a very odd child to many peop...
1,Untitled,https://genzmarketing.xyz/service-CT,Loading... We know training and specific subje...
2,Cold Email Marketing Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Loading... Why is cold email marketing servic...
3,Video Advertisement Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Loading... Theme-based Video Advertisement F...
4,How Lead Generation Works? ( GenZMarketing ),https://genzmarketing.xyz/blog/how-lead-genera...,Loading... Lead generation is the process of a...


In [20]:
# Ensure consistent formatting
data['content'] = data['content'].str.strip()
data['title'] = data['title'].str.strip()

In [24]:
print(data['title'])

0     The Neurodivergent Business Consortium (NBC) p...
1                                              Untitled
2                        Cold Email Marketing Portfolio
3                         Video Advertisement Portfolio
4          How Lead Generation Works? ( GenZMarketing )
5                        Competitive Analysis Portfolio
6                                             Portfolio
7     Instagram Metrics You Should Be Tracking ( Gen...
8                                    Branding Portfolio
9      Is brand engagement necessary? ( GenZMarketing )
10    Factors that Affect Portfolio Allocation ( Gen...
11    Best SEO Agency for Higher Education ( GenZMar...
12                    Social Media Management Portfolio
13    How ChatGPT can help you to write the company ...
14                                          Get Started
15                       Training & Mentoring Portfolio
16                                             Untitled
17                        PR & Communications Po

In [25]:
print(data['title'][13])

How ChatGPT can help you to write the company and personal content? ( GenZMarketing )


In [31]:
# Remove the '( GenZMarketing )' from the 'title' column
data['title'] = data['title'].str.replace(r'\( GenZMarketing \)', '', regex=True)
data['title']


0     The Neurodivergent Business Consortium (NBC) p...
1                                              Untitled
2                        Cold Email Marketing Portfolio
3                         Video Advertisement Portfolio
4                           How Lead Generation Works? 
5                        Competitive Analysis Portfolio
6                                             Portfolio
7             Instagram Metrics You Should Be Tracking 
8                                    Branding Portfolio
9                       Is brand engagement necessary? 
10            Factors that Affect Portfolio Allocation 
11                Best SEO Agency for Higher Education 
12                    Social Media Management Portfolio
13    How ChatGPT can help you to write the company ...
14                                          Get Started
15                       Training & Mentoring Portfolio
16                                             Untitled
17                        PR & Communications Po

In [32]:
print(data['title'][4])

How Lead Generation Works? 


In [33]:
print(data['content'])

0     Loading... I was a very odd child to many peop...
1     Loading... We know training and specific subje...
2     Loading...  Why is cold email marketing servic...
3     Loading...  Theme-based Video Advertisement  F...
4     Loading... Lead generation is the process of a...
5     Loading...  Understand the market and your com...
6     Loading... Social Media Optimization (One Time...
7     Loading... Instagram analytics will give your ...
8     Loading...  Logo, branding and post design  Br...
9     Loading... Brand engagement refers to the proc...
10    Loading... Portfolio allocation refers to the ...
11    Loading... SEO is crucial for higher education...
12    Loading...  Account management & Lead Gen Shah...
13    Loading... ChatGPT is one of the most talked a...
14    Loading... A Discovery Consultation will help ...
15    Loading...  Empowering Your Team with Training...
16    Loading... GenZ researches and understands you...
17    Loading...  Why PR & Communication is Impo

In [34]:
print(data['content'][1])

Loading... We know training and specific subject knowledge is the core of personal development and we have the experience to develop training materials for our clients and target audience. Workshops and training are popular, but they aren't always memorable, even when your audience is learning something new and the pandemic has caused an epidemic of Zoom fatigue. Many people find it difficult to listen to others drone on. But, thankfully, you can deliver memorable workshops and training seminars with just a little knowledge. We administrate different types of cold emails campaigns based our clients necessaty and each of them has its way of tergating customer and success. You can refine the message you want to deliver once you know who your audience is. This will be your event's overarching theme, which will inform the topic. Write down in two or three sentences what you want attendees to learn, think, and feel when they come to your event. Then, make sure that everything you do is in l

In [35]:
# Remove the 'Loading...' from the 'content' column
data['content'] = data['content'].str.replace(r'Loading...', '', regex=True)
data['content']


0      I was a very odd child to many people in my c...
1      We know training and specific subject knowled...
2       Why is cold email marketing service importan...
3       Theme-based Video Advertisement  Faster than...
4      Lead generation is the process of attracting ...
5       Understand the market and your competitors S...
6      Social Media Optimization (One Time)         ...
7      Instagram analytics will give your employees ...
8       Logo, branding and post design  Branding for...
9      Brand engagement refers to the process of for...
10     Portfolio allocation refers to the process of...
11     SEO is crucial for higher education instituti...
12      Account management & Lead Gen Shah is a forc...
13     ChatGPT is one of the most talked about topic...
14     A Discovery Consultation will help you to und...
15      Empowering Your Team with Training and naili...
16     GenZ researches and understands your market t...
17      Why PR & Communication is Important? Sha

In [36]:
print(data['content'][1]) # Displaying the content of the first row

 We know training and specific subject knowledge is the core of personal development and we have the experience to develop training materials for our clients and target audience. Workshops and training are popular, but they aren't always memorable, even when your audience is learning something new and the pandemic has caused an epidemic of Zoom fatigue. Many people find it difficult to listen to others drone on. But, thankfully, you can deliver memorable workshops and training seminars with just a little knowledge. We administrate different types of cold emails campaigns based our clients necessaty and each of them has its way of tergating customer and success. You can refine the message you want to deliver once you know who your audience is. This will be your event's overarching theme, which will inform the topic. Write down in two or three sentences what you want attendees to learn, think, and feel when they come to your event. Then, make sure that everything you do is in line with t

In [37]:
# Remove extra spaces from 'title' and 'content' columns
data['title'] = data['title'].str.replace(r'\s+', ' ', regex=True).str.strip()
data['content'] = data['content'].str.replace(r'\s+', ' ', regex=True).str.strip()

# Display the cleaned dataset
data.head(5)

Unnamed: 0,title,url,content
0,The Neurodivergent Business Consortium (NBC) p...,https://genzmarketing.xyz/blog/the-neurodiverg...,I was a very odd child to many people in my ch...
1,Untitled,https://genzmarketing.xyz/service-CT,We know training and specific subject knowledg...
2,Cold Email Marketing Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Why is cold email marketing service important?...
3,Video Advertisement Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Theme-based Video Advertisement Faster than po...
4,How Lead Generation Works?,https://genzmarketing.xyz/blog/how-lead-genera...,Lead generation is the process of attracting p...


In [38]:
data['title']

0     The Neurodivergent Business Consortium (NBC) p...
1                                              Untitled
2                        Cold Email Marketing Portfolio
3                         Video Advertisement Portfolio
4                            How Lead Generation Works?
5                        Competitive Analysis Portfolio
6                                             Portfolio
7              Instagram Metrics You Should Be Tracking
8                                    Branding Portfolio
9                        Is brand engagement necessary?
10             Factors that Affect Portfolio Allocation
11                 Best SEO Agency for Higher Education
12                    Social Media Management Portfolio
13    How ChatGPT can help you to write the company ...
14                                          Get Started
15                       Training & Mentoring Portfolio
16                                             Untitled
17                        PR & Communications Po

In [39]:
data['content']

0     I was a very odd child to many people in my ch...
1     We know training and specific subject knowledg...
2     Why is cold email marketing service important?...
3     Theme-based Video Advertisement Faster than po...
4     Lead generation is the process of attracting p...
5     Understand the market and your competitors Sha...
6     Social Media Optimization (One Time) Content &...
7     Instagram analytics will give your employees a...
8     Logo, branding and post design Branding for Kw...
9     Brand engagement refers to the process of form...
10    Portfolio allocation refers to the process of ...
11    SEO is crucial for higher education institutio...
12    Account management & Lead Gen Shah is a force ...
13    ChatGPT is one of the most talked about topics...
14    A Discovery Consultation will help you to unde...
15    Empowering Your Team with Training and nailing...
16    GenZ researches and understands your market to...
17    Why PR & Communication is Important? Shah 

In [40]:
# Save the cleaned data
cleaned_data = pd.DataFrame(data)
cleaned_data.to_csv('cleaned_genzmarketing_data.csv', index=False)
print("Data cleaning complete. Saved as cleaned_genzmarketing_data.csv.")

Data cleaning complete. Saved as cleaned_genzmarketing_data.csv.


In [42]:
# Check cleaned data
cleaned_data = pd.read_csv('cleaned_genzmarketing_data.csv')
cleaned_data.head(5) # Displaying the first 5 rows of the cleaned dataset

Unnamed: 0,title,url,content
0,The Neurodivergent Business Consortium (NBC) p...,https://genzmarketing.xyz/blog/the-neurodiverg...,I was a very odd child to many people in my ch...
1,Untitled,https://genzmarketing.xyz/service-CT,We know training and specific subject knowledg...
2,Cold Email Marketing Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Why is cold email marketing service important?...
3,Video Advertisement Portfolio,https://genzmarketing.xyz/portfolio/category/e...,Theme-based Video Advertisement Faster than po...
4,How Lead Generation Works?,https://genzmarketing.xyz/blog/how-lead-genera...,Lead generation is the process of attracting p...
