# soon, on May 2025 

This notebook focuses on the SQL Practice Exams 1, 2, and 3 of the [**365 Data Science**](https://learn.365datascience.com/exams/?tab=practice) platform, including applications that illustrate the usage and the value of SQL procedures and functions.

<br>

From the [365 Data Science Practice Exams](https://365datascience.com/resources-center/practice-exams/) webpage:

> Discover a plethora of online exams that will test your current knowledge and ability to solve data science problems.  
> Evaluate your skills online **at no cost** with SQL mock tests, Excel and NumPy exam questions, and more.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/365ds-practice-exams-people-analytics-dataset/dept_emp.csv
/kaggle/input/365ds-practice-exams-people-analytics-dataset/dept_manager.csv
/kaggle/input/365ds-practice-exams-people-analytics-dataset/employees.csv
/kaggle/input/365ds-practice-exams-people-analytics-dataset/titles.csv
/kaggle/input/365ds-practice-exams-people-analytics-dataset/salaries.csv
/kaggle/input/365ds-practice-exams-people-analytics-dataset/employees.db
/kaggle/input/365ds-practice-exams-people-analytics-dataset/departments.csv


In [2]:
table_names = ['departments', 
               'dept_emp', 
               'dept_manager', 
               'employees', 
               'salaries', 
               'titles']

dfs = []
for table_name in table_names:
    df = pd.read_csv('/kaggle/input/365ds-practice-exams-people-analytics-dataset/' + table_name + '.csv')
    dfs.append(df)

print('🔎 INSPECTING THE DATASET ##############################################################')

print('\n\n\n TABLES INFO ++++++++++++++++++++++++++++++++++++++')
for (table_name, df) in zip(table_names, dfs):
    print('\n\n', table_name.upper())
    df.info()

print('\n\n\n COLUMN NAMES +++++++++++++++++++++++++++++++++++++')
for (table_name, df) in zip(table_names, dfs):
    print('\n\n', table_name.upper())
    print(list(df.columns))

🔎 INSPECTING THE DATASET ##############################################################



 TABLES INFO ++++++++++++++++++++++++++++++++++++++


 DEPARTMENTS
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   dept_no    9 non-null      object
 1   dept_name  9 non-null      object
dtypes: object(2)
memory usage: 276.0+ bytes


 DEPT_EMP
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 331603 entries, 0 to 331602
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   emp_no     331603 non-null  int64 
 1   dept_no    331603 non-null  object
 2   from_date  331603 non-null  object
 3   to_date    331603 non-null  object
dtypes: int64(1), object(3)
memory usage: 10.1+ MB


 DEPT_MANAGER
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Colum

In [3]:
# The INFO seen above reveals that the following fields need to be casted to the appropriate type:
# Category: ['gender']
# Datetime: ['from_date', 'to_date', 'birth_date', 'hire_date']

category_cols = ['gender']
datetime_cols = ['from_date', 'to_date', 'birth_date', 'hire_date']

print('\n\n\n 🏷️ APPLYING APPROPRIATE DATA TYPES ###################################################### ')
for (table_name, df) in zip(table_names, dfs):
    
    column_names = df.columns
    for column_name in column_names:
        
        if column_name in category_cols:
            df[column_name] = df[column_name].astype('category')
            print(df[column_name].dtype, table_name, column_name)
            
        if column_name in datetime_cols:
            df[column_name] = df[column_name].replace('9999-01-01', pd.NaT)
            df[column_name] = df[column_name].astype('datetime64[ns]')
            print(df[column_name].dtype, table_name, column_name)

print('\n\n\n TABLES DESCRIBE ++++++++++++++++++++++++++++++++++')
for (table_name, df) in zip(table_names, dfs):
    print('\n\n', table_name.upper(), '\n', df.describe())




 🏷️ APPLYING APPROPRIATE DATA TYPES ###################################################### 
datetime64[ns] dept_emp from_date
datetime64[ns] dept_emp to_date
datetime64[ns] dept_manager from_date
datetime64[ns] dept_manager to_date
datetime64[ns] employees birth_date
category employees gender
datetime64[ns] employees hire_date
datetime64[ns] salaries from_date
datetime64[ns] salaries to_date
datetime64[ns] titles from_date
datetime64[ns] titles to_date



 TABLES DESCRIBE ++++++++++++++++++++++++++++++++++


 DEPARTMENTS 
        dept_no  dept_name
count        9          9
unique       9          9
top       d001  Marketing
freq         1          1


 DEPT_EMP 
               emp_no                      from_date  \
count  331603.000000                         331603   
mean   253332.605025  1993-01-01 23:42:24.762260864   
min     10001.000000            1985-01-01 00:00:00   
25%     85005.500000            1989-02-25 00:00:00   
50%    250001.000000            1993-01-27 00:00: