In [76]:
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3

degree_jobs_cleaned = pd.read_csv("degree_jobs_cleaned.csv")
degree_jobs_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 759 entries, 0 to 758
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   job_id             759 non-null    int64 
 1   job_title          759 non-null    object
 2   salary_usd         759 non-null    int64 
 3   employment_type    759 non-null    object
 4   company_location   759 non-null    object
 5   is_remote          759 non-null    bool  
 6   employee_location  759 non-null    object
 7   job_skills         759 non-null    object
 8   degree_required    759 non-null    object
 9   posting_date       759 non-null    object
 10  company_name       759 non-null    object
 11  has_target         759 non-null    bool  
 12  salary_tier        759 non-null    object
 13  degree_flag        759 non-null    object
dtypes: bool(2), int64(2), object(10)
memory usage: 72.8+ KB


In [77]:
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3

no_degree_jobs_cleaned = pd.read_csv("no_degree_jobs_cleaned.csv")
no_degree_jobs_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1184 entries, 0 to 1183
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   job_id             1184 non-null   int64 
 1   job_title          1184 non-null   object
 2   salary_usd         1184 non-null   int64 
 3   employment_type    1184 non-null   object
 4   company_location   1184 non-null   object
 5   is_remote          1184 non-null   bool  
 6   employee_location  1184 non-null   object
 7   job_skills         1184 non-null   object
 8   degree_required    1184 non-null   bool  
 9   posting_date       1184 non-null   object
 10  company_name       1184 non-null   object
 11  salary_tier        1184 non-null   object
 12  has_target         1184 non-null   bool  
 13  degree_flag        1184 non-null   object
dtypes: bool(3), int64(2), object(9)
memory usage: 105.3+ KB


In [78]:
degree_jobs_cleaned['employment_type'].value_counts()

employment_type
PT    198
CT    190
FT    186
FL    185
Name: count, dtype: int64

In [79]:
no_degree_jobs_cleaned['employment_type'].value_counts()

employment_type
Full-time                   1089
Contractor                    62
Full-time and Part-time       15
Part-time                      5
Full-time and Temp work        4
Temp work                      4
Full-time and Per diem         2
Full-time and Contractor       2
Contractor and Temp work       1
Name: count, dtype: int64

In [80]:
# Map abbreviations to full strings for consistency
mapping = {
    "FT": "Full-time",
    "FL": "Full-time",  
    "PT": "Part-time",
    "CT": "Contractor"
}
degree_jobs_cleaned['employment_type'] = degree_jobs_cleaned['employment_type'].map(mapping)

In [81]:
# Simplify mixed types and collapse into a primary category for consistency
collapse_map = {
    "Full-time and Part-time": "Full-time",
    "Full-time and Temp work": "Full-time",
    "Full-time and Per diem": "Full-time",
    "Full-time and Contractor": "Full-time",
    "Contractor and Temp work": "Contractor"
}
no_degree_jobs_cleaned['employment_type'] = no_degree_jobs_cleaned['employment_type'].replace(collapse_map)

In [82]:
print(degree_jobs_cleaned['employment_type'].unique())

['Contractor' 'Part-time' 'Full-time']


In [83]:
print(no_degree_jobs_cleaned['employment_type'].unique())

['Full-time' 'Contractor' 'Part-time' 'Temp work']


In [84]:
# Unified mapping
categories = ['Full-time', 'Part-time', 'Contractor', 'Temp work']

In [85]:
print(degree_jobs_cleaned.columns)

Index(['job_id', 'job_title', 'salary_usd', 'employment_type',
       'company_location', 'is_remote', 'employee_location', 'job_skills',
       'degree_required', 'posting_date', 'company_name', 'has_target',
       'salary_tier', 'degree_flag'],
      dtype='object')


In [86]:
# Create and connect to a SQLite database file
conn = sqlite3.connect("data_analyst_jobs.db")
cursor = conn.cursor()

In [87]:
degree_jobs_cleaned.to_sql("degree_jobs", conn, if_exists="replace", index=False)

759

In [89]:
no_degree_jobs_cleaned.to_sql("no_degree_jobs", conn, if_exists="replace", index=False)

1184

In [90]:
# List tables
print(pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn))

             name
0     degree_jobs
1  no_degree_jobs


In [91]:
# Inspect schema for degree_jobs
print(pd.read_sql_query("PRAGMA table_info(degree_jobs);", conn))

    cid               name     type  notnull dflt_value  pk
0     0             job_id  INTEGER        0       None   0
1     1          job_title     TEXT        0       None   0
2     2         salary_usd  INTEGER        0       None   0
3     3    employment_type     TEXT        0       None   0
4     4   company_location     TEXT        0       None   0
5     5          is_remote  INTEGER        0       None   0
6     6  employee_location     TEXT        0       None   0
7     7         job_skills     TEXT        0       None   0
8     8    degree_required     TEXT        0       None   0
9     9       posting_date     TEXT        0       None   0
10   10       company_name     TEXT        0       None   0
11   11         has_target  INTEGER        0       None   0
12   12        salary_tier     TEXT        0       None   0
13   13        degree_flag     TEXT        0       None   0


In [92]:
# Inspect schema for no_degree_jobs
print(pd.read_sql_query("PRAGMA table_info(no_degree_jobs);", conn))

    cid               name     type  notnull dflt_value  pk
0     0             job_id  INTEGER        0       None   0
1     1          job_title     TEXT        0       None   0
2     2         salary_usd  INTEGER        0       None   0
3     3    employment_type     TEXT        0       None   0
4     4   company_location     TEXT        0       None   0
5     5          is_remote  INTEGER        0       None   0
6     6  employee_location     TEXT        0       None   0
7     7         job_skills     TEXT        0       None   0
8     8    degree_required  INTEGER        0       None   0
9     9       posting_date     TEXT        0       None   0
10   10       company_name     TEXT        0       None   0
11   11        salary_tier     TEXT        0       None   0
12   12         has_target  INTEGER        0       None   0
13   13        degree_flag     TEXT        0       None   0
