In [1]:
import pandas as pd
import os
import pathlib

### Open csv dir and load all csv

In [2]:
def load_data_csv(path):
    if not os.path.exists(path):
        raise ValueError("Error: path does not exist")
    
    try:
        for root, _, files in os.walk(path):
            for f in files:
                if f.endswith(".csv"):
                    yield os.path.join(root, f)
    except Exception as e:
        raise ValueError(f"Error loading files due to: {e}")

In [23]:
try:
    all_dfs = []
    for csv_file in load_data_csv("./csv_dir"):  # assuming you are calling method from WebScraper
        try:
            df = pd.read_csv(csv_file, on_bad_lines='skip')  # Skip malformed rows
            df.replace("", pd.NA, inplace=True)  # Treat empty strings as missing
            df.dropna(inplace=True)  # Remove rows with missing values
            all_dfs.append(df)
        except Exception as inner_e:
            print(f"⚠️ Skipping file {csv_file} due to read error: {inner_e}")

    if not all_dfs:
        print("⚠️ No valid CSV files found or all files failed.")
        ads = pd.DataFrame()
    else:
        ads = pd.concat(all_dfs, ignore_index=True)
        print(ads.head())
        
except Exception as e:
    print(f"❌ Error occurred during dataset loading: {e}")


                                            main_url  \
0  https://www.linkedin.com/jobs/search?keywords=...   
1  https://www.linkedin.com/jobs/search?keywords=...   
2  https://www.linkedin.com/jobs/search?keywords=...   
3  https://www.linkedin.com/jobs/search?keywords=...   
4  https://www.linkedin.com/jobs/search?keywords=...   

                                              title  \
0                               Front-End Developer   
1                                React JS Developer   
2                              MERN Stack Developer   
3  Frontend Engineer I (with Contentful experience)   
4                                 Frontend Engineer   

                                                 url company_name  \
0  https://pk.linkedin.com/jobs/view/front-end-de...    Spiralyze   
1  https://pk.linkedin.com/jobs/view/react-js-dev...     PieCyfer   
2  https://pk.linkedin.com/jobs/view/mern-stack-d...      Devsinc   
3  https://pk.linkedin.com/jobs/view/frontend-eng...    

#### Clean dataset

In [4]:
ads

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1,https://www.linkedin.com/jobs/search?keywords=...,React JS Developer,https://pk.linkedin.com/jobs/view/react-js-dev...,PieCyfer,2 months ago,"Lahore, Punjab, Pakistan"
2,https://www.linkedin.com/jobs/search?keywords=...,MERN Stack Developer,https://pk.linkedin.com/jobs/view/mern-stack-d...,Devsinc,3 days ago,"Lahore, Punjab, Pakistan"
3,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer I (with Contentful experience),https://pk.linkedin.com/jobs/view/frontend-eng...,Outliant,3 days ago,"Lahore, Punjab, Pakistan"
4,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer,https://pk.linkedin.com/jobs/view/frontend-eng...,ClearGrid,1 month ago,"Lahore, Punjab, Pakistan"
...,...,...,...,...,...,...
2264,https://www.linkedin.com/jobs/search?keywords=...,Senior Java Developer,https://pk.linkedin.com/jobs/view/senior-java-...,Soliton Technologies,2 weeks ago,"Lahore, Punjab, Pakistan"
2265,https://www.linkedin.com/jobs/search?keywords=...,Oracle Apex Developer (Onsite Lahore),https://pk.linkedin.com/jobs/view/oracle-apex-...,Biztech Minds,2 weeks ago,"Lahore, Punjab, Pakistan"
2266,https://www.linkedin.com/jobs/search?keywords=...,Oracle Technical Developer Lead Consultant,https://pk.linkedin.com/jobs/view/oracle-techn...,GenZ Talent,1 week ago,"Lahore, Punjab, Pakistan"
2267,https://www.linkedin.com/jobs/search?keywords=...,Full Stack JAVA Developer (FinTech),https://pk.linkedin.com/jobs/view/full-stack-j...,Nakisa,1 day ago,"Lahore, Punjab, Pakistan"


In [29]:
ads.drop_duplicates(subset=['title', 'company_name', 'location','url' ], inplace=True)
ads.dropna(inplace=True)
ads
 

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1,https://www.linkedin.com/jobs/search?keywords=...,React JS Developer,https://pk.linkedin.com/jobs/view/react-js-dev...,PieCyfer,2 months ago,"Lahore, Punjab, Pakistan"
2,https://www.linkedin.com/jobs/search?keywords=...,MERN Stack Developer,https://pk.linkedin.com/jobs/view/mern-stack-d...,Devsinc,3 days ago,"Lahore, Punjab, Pakistan"
3,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer I (with Contentful experience),https://pk.linkedin.com/jobs/view/frontend-eng...,Outliant,3 days ago,"Lahore, Punjab, Pakistan"
4,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer,https://pk.linkedin.com/jobs/view/frontend-eng...,ClearGrid,1 month ago,"Lahore, Punjab, Pakistan"
...,...,...,...,...,...,...
2218,https://www.linkedin.com/jobs/search?keywords=...,Senior Software Engineer - React JS,https://pk.linkedin.com/jobs/view/senior-softw...,Dubizzle Labs,2 weeks ago,"Lahore, Punjab, Pakistan"
2244,https://www.linkedin.com/jobs/search?keywords=...,Sr. Full Stack Developer,https://pk.linkedin.com/jobs/view/sr-full-stac...,Xcentric Services | Web & App Development Comp...,1 day ago,"Lahore, Punjab, Pakistan"
2246,https://www.linkedin.com/jobs/search?keywords=...,Senior Software Engineer (Ruby/Scala + TypeScr...,https://pk.linkedin.com/jobs/view/senior-softw...,TechieMinions,2 weeks ago,"Lahore, Punjab, Pakistan"
2259,https://www.linkedin.com/jobs/search?keywords=...,Senior Software Engineer - Laravel,https://pk.linkedin.com/jobs/view/senior-softw...,Dubizzle Labs,2 weeks ago,"Lahore, Punjab, Pakistan"


In [6]:
ads.groupby("main_url").size()


main_url
https://www.linkedin.com/jobs/search?keywords=Data%20Analysis&location=Lahore&geoId=104112529&distance=25&f_JT=F&f_E=2%2C3%2C4&f_PP=104112529&f_TPR=&f_WT=1&position=1&pageNum=0    1077
https://www.linkedin.com/jobs/search?keywords=Python%20Fastapi&location=Lahore&geoId=104112529&distance=25&f_TPR=&f_WT=1&f_PP=104112529&position=1&pageNum=0                         115
https://www.linkedin.com/jobs/search?keywords=React%20Js&location=Lahore&geoId=104112529&distance=25&f_TPR=&position=1&pageNum=0                                                    1077
dtype: int64

In [7]:
ads = ads.drop_duplicates(subset="main_url", keep="first")
ads

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
95,https://www.linkedin.com/jobs/search?keywords=...,Senior Python Developer,https://pk.linkedin.com/jobs/view/senior-pytho...,Devsinc,3 weeks ago,"Lahore, Punjab, Pakistan"
108,https://www.linkedin.com/jobs/search?keywords=...,Associate Business Analyst,https://pk.linkedin.com/jobs/view/associate-bu...,Devsinc,3 weeks ago,"Lahore, Punjab, Pakistan"


In [8]:
filtered_df = ads[ads['posted_time'] == "1 day ago"]
filtered_df

Unnamed: 0,main_url,title,url,company_name,posted_time,location


### Markdown

In [9]:
ads

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
95,https://www.linkedin.com/jobs/search?keywords=...,Senior Python Developer,https://pk.linkedin.com/jobs/view/senior-pytho...,Devsinc,3 weeks ago,"Lahore, Punjab, Pakistan"
108,https://www.linkedin.com/jobs/search?keywords=...,Associate Business Analyst,https://pk.linkedin.com/jobs/view/associate-bu...,Devsinc,3 weeks ago,"Lahore, Punjab, Pakistan"


In [10]:
from linkedin_scrap import WebScraper

In [11]:
ws=WebScraper()

✅ Config loaded and validated successfully!


In [12]:
tdata=[
  {
    "title": "Front End UI Developer",
    "url": "https://pk.linkedin.com/jobs/view/front-end-ui-developer-at-info-resume-edge-gcc-4226293092?position=4&amp;pageNum=0&amp;refId=l1n68mcGur9wgf9Z%2BQALrQ%3D%3D&amp;trackingId=KyekSneG2B69rmkd5UuedQ%3D%3D",
    "company_name": "Info Resume Edge - GCC ",
    "posted_time": "2 weeks ago",
    "location": "Lahore, Punjab, Pakistan"
  },
 
]


In [13]:
ws.init_configs()


✅ Config loaded and validated successfully!


In [14]:
cd=ws.get_combine_data()


In [15]:
cd

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1,https://www.linkedin.com/jobs/search?keywords=...,React JS Developer,https://pk.linkedin.com/jobs/view/react-js-dev...,PieCyfer,2 months ago,"Lahore, Punjab, Pakistan"
2,https://www.linkedin.com/jobs/search?keywords=...,MERN Stack Developer,https://pk.linkedin.com/jobs/view/mern-stack-d...,Devsinc,3 days ago,"Lahore, Punjab, Pakistan"
3,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer I (with Contentful experience),https://pk.linkedin.com/jobs/view/frontend-eng...,Outliant,3 days ago,"Lahore, Punjab, Pakistan"
4,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer,https://pk.linkedin.com/jobs/view/frontend-eng...,ClearGrid,1 month ago,"Lahore, Punjab, Pakistan"
...,...,...,...,...,...,...
3557,https://www.linkedin.com/jobs/search?keywords=...,Senior Java Developer,https://pk.linkedin.com/jobs/view/senior-java-...,Soliton Technologies,2 weeks ago,"Lahore, Punjab, Pakistan"
3558,https://www.linkedin.com/jobs/search?keywords=...,Oracle Apex Developer (Onsite Lahore),https://pk.linkedin.com/jobs/view/oracle-apex-...,Biztech Minds,2 weeks ago,"Lahore, Punjab, Pakistan"
3559,https://www.linkedin.com/jobs/search?keywords=...,Oracle Technical Developer Lead Consultant,https://pk.linkedin.com/jobs/view/oracle-techn...,GenZ Talent,1 week ago,"Lahore, Punjab, Pakistan"
3562,https://www.linkedin.com/jobs/search?keywords=...,Full Stack JAVA Developer (FinTech),https://pk.linkedin.com/jobs/view/full-stack-j...,Nakisa,1 day ago,"Lahore, Punjab, Pakistan"


In [16]:
tdata[0]

{'title': 'Front End UI Developer',
 'url': 'https://pk.linkedin.com/jobs/view/front-end-ui-developer-at-info-resume-edge-gcc-4226293092?position=4&amp;pageNum=0&amp;refId=l1n68mcGur9wgf9Z%2BQALrQ%3D%3D&amp;trackingId=KyekSneG2B69rmkd5UuedQ%3D%3D',
 'company_name': 'Info Resume Edge - GCC ',
 'posted_time': '2 weeks ago',
 'location': 'Lahore, Punjab, Pakistan'}

In [17]:
 
data=ws.check_duplicate(tdata[0],cd)
data

False

In [18]:
# cd[(cd["company_namee"] == "pak-wheel") ].groupby("main_url").size()
 

In [19]:
if not os.listdir(ws.csv_path):
    print("not data")
else:
    print("DATA found")

DATA found


In [20]:
cd

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1,https://www.linkedin.com/jobs/search?keywords=...,React JS Developer,https://pk.linkedin.com/jobs/view/react-js-dev...,PieCyfer,2 months ago,"Lahore, Punjab, Pakistan"
2,https://www.linkedin.com/jobs/search?keywords=...,MERN Stack Developer,https://pk.linkedin.com/jobs/view/mern-stack-d...,Devsinc,3 days ago,"Lahore, Punjab, Pakistan"
3,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer I (with Contentful experience),https://pk.linkedin.com/jobs/view/frontend-eng...,Outliant,3 days ago,"Lahore, Punjab, Pakistan"
4,https://www.linkedin.com/jobs/search?keywords=...,Frontend Engineer,https://pk.linkedin.com/jobs/view/frontend-eng...,ClearGrid,1 month ago,"Lahore, Punjab, Pakistan"
...,...,...,...,...,...,...
3557,https://www.linkedin.com/jobs/search?keywords=...,Senior Java Developer,https://pk.linkedin.com/jobs/view/senior-java-...,Soliton Technologies,2 weeks ago,"Lahore, Punjab, Pakistan"
3558,https://www.linkedin.com/jobs/search?keywords=...,Oracle Apex Developer (Onsite Lahore),https://pk.linkedin.com/jobs/view/oracle-apex-...,Biztech Minds,2 weeks ago,"Lahore, Punjab, Pakistan"
3559,https://www.linkedin.com/jobs/search?keywords=...,Oracle Technical Developer Lead Consultant,https://pk.linkedin.com/jobs/view/oracle-techn...,GenZ Talent,1 week ago,"Lahore, Punjab, Pakistan"
3562,https://www.linkedin.com/jobs/search?keywords=...,Full Stack JAVA Developer (FinTech),https://pk.linkedin.com/jobs/view/full-stack-j...,Nakisa,1 day ago,"Lahore, Punjab, Pakistan"


In [21]:
cd[cd["title"]=="Front-End Developer"]

Unnamed: 0,main_url,title,url,company_name,posted_time,location
0,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
237,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
437,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
581,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1094,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
1340,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,1 week ago,"Lahore, Punjab, Pakistan"
2409,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,2 weeks ago,"Lahore, Punjab, Pakistan"
2758,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,2 weeks ago,"Lahore, Punjab, Pakistan"
2897,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,2 weeks ago,"Lahore, Punjab, Pakistan"
3171,https://www.linkedin.com/jobs/search?keywords=...,Front-End Developer,https://pk.linkedin.com/jobs/view/front-end-de...,Spiralyze,2 weeks ago,"Lahore, Punjab, Pakistan"
