In [1]:
import pandas as pd
import numpy as np

In [2]:
try:
    df = pd.read_csv('job_descriptions.csv')
except FileNotFoundError:
    print("Error: The file 'job_descriptions.csv' was not found. Please check the file path.")
    exit()

In [3]:
df.head()

Unnamed: 0,Job Id,Experience,Qualifications,Salary Range,location,Country,latitude,longitude,Work Type,Company Size,...,Contact,Job Title,Role,Job Portal,Job Description,Benefits,skills,Responsibilities,Company,Company Profile
0,1089840000000000.0,5 to 15 Years,M.Tech,$59K-$99K,Douglas,Isle of Man,54.2361,-4.5481,Intern,26801,...,001-381-930-7517x737,Digital Marketing Specialist,Social Media Manager,Snagajob,Social Media Managers oversee an organizations...,"{'Flexible Spending Accounts (FSAs), Relocatio...","Social media platforms (e.g., Facebook, Twitte...","Manage and grow social media accounts, create ...",Icahn Enterprises,"{""Sector"":""Diversified"",""Industry"":""Diversifie..."
1,398454000000000.0,2 to 12 Years,BCA,$56K-$116K,Ashgabat,Turkmenistan,38.9697,59.5563,Intern,100340,...,461-509-4216,Web Developer,Frontend Web Developer,Idealist,Frontend Web Developers design and implement u...,"{'Health Insurance, Retirement Plans, Paid Tim...","HTML, CSS, JavaScript Frontend frameworks (e.g...","Design and code user interfaces for websites, ...",PNC Financial Services Group,"{""Sector"":""Financial Services"",""Industry"":""Com..."
2,481640000000000.0,0 to 12 Years,PhD,$61K-$104K,Macao,"Macao SAR, China",22.1987,113.5439,Temporary,84525,...,9687619505,Operations Manager,Quality Control Manager,Jobs2Careers,Quality Control Managers establish and enforce...,"{'Legal Assistance, Bonuses and Incentive Prog...",Quality control processes and methodologies St...,Establish and enforce quality control standard...,United Services Automobile Assn.,"{""Sector"":""Insurance"",""Industry"":""Insurance: P..."
3,688193000000000.0,4 to 11 Years,PhD,$65K-$91K,Porto-Novo,Benin,9.3077,2.3158,Full-Time,129896,...,+1-820-643-5431x47576,Network Engineer,Wireless Network Engineer,FlexJobs,"Wireless Network Engineers design, implement, ...","{'Transportation Benefits, Professional Develo...",Wireless network design and architecture Wi-Fi...,"Design, configure, and optimize wireless netwo...",Hess,"{""Sector"":""Energy"",""Industry"":""Mining, Crude-O..."
4,117058000000000.0,1 to 12 Years,MBA,$64K-$87K,Santiago,Chile,-35.6751,-71.5429,Intern,53944,...,343.975.4702x9340,Event Manager,Conference Manager,Jobs2Careers,A Conference Manager coordinates and manages c...,"{'Flexible Spending Accounts (FSAs), Relocatio...",Event planning Conference logistics Budget man...,Specialize in conference and convention planni...,Cairn Energy,"{""Sector"":""Energy"",""Industry"":""Energy - Oil & ..."


In [4]:
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')
df.dropna(subset=['latitude'], inplace=True)


df['Job Posting Date'] = pd.to_datetime(df['Job Posting Date'], errors='coerce')

# --- Apply all the filtering conditions ---


df_filtered = df[df['Job Title'].isin(['Data Engineer', 'Data Scientist'])]

# Asian countries and countries starting with 'C'
asian_countries = ['China', 'India', 'Japan', 'South Korea', 'Singapore', 'Malaysia', 'Thailand', 'Indonesia', 'Vietnam', 'Philippines']
countries_starting_with_C = [country for country in df['Country'].unique() if isinstance(country, str) and country.startswith('C')]
df_filtered = df_filtered[
    (~df_filtered['Country'].isin(asian_countries)) &
    (~df_filtered['Country'].isin(countries_starting_with_C))
]

#  Filter for Preference 'Female'
df_filtered = df_filtered[df_filtered['Preference'] == 'Female']

#  Filter for Job Posting Date
start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-06-01')
df_filtered = df_filtered[
    (df_filtered['Job Posting Date'] >= start_date) &
    (df_filtered['Job Posting Date'] <= end_date)
]

#  Filter for Qualification 'B.Tech'
df_filtered = df_filtered[df_filtered['Qualifications'].str.contains('B.Tech', case=False, na=False)]

# Filter for Latitude below 10
df_filtered = df_filtered[df_filtered['latitude'] < 10]


output_file = 'task3_filtered_data.csv'
df_filtered.to_csv(output_file, index=False)

print(f"Filtered data for Task 3 has been saved to {output_file}")
print(f"Number of rows in output file: {len(df_filtered)}")

  df['Job Posting Date'] = pd.to_datetime(df['Job Posting Date'], errors='coerce')


Filtered data for Task 3 has been saved to task3_filtered_data.csv
Number of rows in output file: 23
