# Create table

***data dimension***

In [17]:
import pandas as pd

# Load your dataset
data = pd.read_csv('final_result.csv')

# Extract unique dates and corresponding month and year
date_dimension = data[['date']].copy()
date_dimension = date_dimension.drop_duplicates()
date_dimension['dateID'] = range(1, len(date_dimension) + 1)
date_dimension['date'] = pd.to_datetime(date_dimension['date'])
date_dimension['month'] = date_dimension['date'].dt.strftime('%B')
date_dimension['year'] = date_dimension['date'].dt.year

# Select required columns for the data dimension
date_dimension = date_dimension[['dateID', 'date', 'month', 'year']]

# Save the date dimension to a new CSV file
date_dimension.to_csv('date_dimension.csv', index=False)


***province dimension***

In [18]:
import pandas as pd

# Load your dataset
data = pd.read_csv('final_result.csv')

# Extract unique provinces
province_dimension = data[['province', 'prname']].drop_duplicates()


# Save the province dimension to a new CSV file
province_dimension.to_csv('province_dimension.csv', index=False)


***Vaccination dimension***

In [19]:
import pandas as pd

# Load your dataset
data = pd.read_csv('final_result.csv')

# Extract necessary columns for the Vaccination Dimension
vaccination_dimension = data[['date', 'province', 'partial', 'fully', 'booster']].copy()

# Load Province Dimension for Foreign Key
province_dimension = pd.read_csv('province_dimension.csv')

# Merge Province Dimension to get ProvinceID
vaccination_dimension = vaccination_dimension.merge(province_dimension, how='left', left_on='province', right_on='province')


date_dimension = pd.read_csv('date_dimension.csv')
# Merge Province Dimension to get dateID
vaccination_dimension = vaccination_dimension.merge(date_dimension, how='left', left_on='date', right_on='date')

# Drop redundant columns
vaccination_dimension.drop(['prname','date'], axis=1, inplace=True)

vaccination_dimension['vacID'] = range(1, len(vaccination_dimension) + 1)

vaccination_dimension = vaccination_dimension[['vacID','dateID', 'province', 'partial', 'fully', 'booster']].drop_duplicates()

# Save the Vaccination Dimension to a new CSV file
vaccination_dimension.to_csv('vaccination_dimension.csv', index=False)


***Covid19 Metric dimension***

In [20]:
import pandas as pd

# Load your dataset
data = pd.read_csv('final_result.csv')

# Extract necessary columns for the COVID19 Metrics Dimension
covid_dimension = data[['date', 'province', 'totalcases', 'numdeaths', 'recovery']].copy()

# Load Date Dimension for Foreign Key
date_dimension = pd.read_csv('date_dimension.csv')

# Merge Date Dimension to get DateID
covid_dimension = covid_dimension.merge(date_dimension, how='left', left_on='date', right_on='date')

# # Drop redundant columns
# covid_dimension.drop(['date'], axis=1, inplace=True)

covid_dimension['covidID'] = range(1, len(covid_dimension) + 1)

covid_dimension = covid_dimension[['covidID', 'dateID','province', 'totalcases', 'numdeaths', 'recovery']].drop_duplicates()

# Save the COVID19 Metrics Dimension to a new CSV file
covid_dimension.to_csv('covid19_metrics_dimension.csv', index=False)


# SQL create

date dimension

In [21]:
import csv

# Define your CSV file path
csv_file_path = 'date_dimension.csv'

create_table_statement = """
CREATE TABLE DateDimension (
    dateID INTEGER PRIMARY KEY,
    date DATE,
    month VARCHAR(20),
    year INTEGER
);
"""

# Read data from the CSV file and store it in a list of tuples
data_list = []
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        # Assuming each row in the CSV corresponds to one tuple of data
        data_list.append(tuple(row))

# Generate SQL insert statements
insert_statements = []
count = 0
for row in data_list:
    if count == 0:
        count = 1
        continue
    values = ', '.join([f"'{str(value)}'" if isinstance(value, str) else str(value) for value in row])
    insert_statements.append(f"INSERT INTO DateDimension VALUES ({values});")

# Write SQL statements to a file
with open('dimension_creating.sql', 'w') as f:
    f.write(create_table_statement + '\n')
    f.write('\n'.join(insert_statements))

province dimension

In [22]:
import csv

# Define your CSV file path
csv_file_path = 'province_dimension.csv'

create_table_statement = """
CREATE TABLE ProvinceDimension (
    province VARCHAR(2) PRIMARY KEY,
    prname VARCHAR(50)
);
"""

# Read data from the CSV file and store it in a list of tuples
data_list = []
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        # Assuming each row in the CSV corresponds to one tuple of data
        data_list.append(tuple(row))

# Generate SQL insert statements
insert_statements = []
count = 0
for row in data_list:
    if count == 0:
        count = 1
        continue
    values = ', '.join([f"'{str(value)}'" if isinstance(value, str) else str(value) for value in row])
    insert_statements.append(f"INSERT INTO ProvinceDimension VALUES ({values});")

# Write SQL statements to a file
with open('dimension_creating.sql', 'a') as f:
    f.write('\n')
    f.write(create_table_statement + '\n')
    f.write('\n'.join(insert_statements))

vaccination dimension

In [23]:
import csv

# Define your CSV file path
csv_file_path = 'vaccination_dimension.csv'

create_table_statement = """
CREATE TABLE VaccinationDimension (
    vacID INTEGER PRIMARY KEY,
    dateID INTEGER,
    province VARCHAR(50),
    partial INTEGER,
    fully INTEGER,
    booster INTEGER,
    FOREIGN KEY (dateID) REFERENCES DateDimension(dateID),
    FOREIGN KEY (province) REFERENCES ProvinceDimension(province)
);
"""

# Read data from the CSV file and store it in a list of tuples
data_list = []
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        # Assuming each row in the CSV corresponds to one tuple of data
        data_list.append(tuple(row))

# Generate SQL insert statements
insert_statements = []
count = 0
for row in data_list:
    if count == 0:
        count = 1
        continue
    values = ', '.join([f"'{str(value)}'" if isinstance(value, str) else str(value) for value in row])
    insert_statements.append(f"INSERT INTO VaccinationDimension VALUES ({values});")

# Write SQL statements to a file
with open('dimension_creating.sql', 'a') as f:
    f.write('\n')
    f.write(create_table_statement + '\n')
    f.write('\n'.join(insert_statements))

Covid Metric Dimension

In [24]:
import csv

# Define your CSV file path
csv_file_path = 'covid19_metrics_dimension.csv'

create_table_statement = """
CREATE TABLE COVID19MetricsDimension (
    covidID INTEGER PRIMARY KEY,
    dateID INTEGER,
    province VARCHAR(50),
    totalCases INTEGER,
    numDeaths INTEGER,
    recovery INTEGER,
    FOREIGN KEY (dateID) REFERENCES DateDimension(dateID),
    FOREIGN KEY (province) REFERENCES ProvinceDimension(province)
);
"""

# Read data from the CSV file and store it in a list of tuples
data_list = []
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        # Assuming each row in the CSV corresponds to one tuple of data
        data_list.append(tuple(row))

# Generate SQL insert statements
insert_statements = []
count = 0
for row in data_list:
    if count == 0:
        count = 1
        continue
    values = ', '.join([f"'{str(value)}'" if isinstance(value, str) else str(value) for value in row])
    insert_statements.append(f"INSERT INTO COVID19MetricsDimension VALUES ({values});")

# Write SQL statements to a file
with open('dimension_creating.sql', 'a') as f:
    f.write('\n')
    f.write(create_table_statement + '\n')
    f.write('\n'.join(insert_statements))