# **Setup**

In [21]:
import os
import regex as re
import pandas as pd
from sqlalchemy import create_engine

In [24]:
Folder_Input = 'Input Data'

# **Create Database Connection**

In [6]:
PGSQL_Server = 'localhost:5432'  # Use Your Server Name
PGSQL_Username = 'postgres'      # Use Your Username
PGSQL_Password = '23poonam'      # Use Your Password
PGSQL_Database = 'paintings'      # Use Your Database Name

connection_string = f'postgresql://{PGSQL_Username}:{PGSQL_Password}@{PGSQL_Server}/{PGSQL_Database}'
Query_Engine = create_engine(connection_string)

Connection = Query_Engine.connect()

# **Read Data Files**

In [25]:
Files_List = os.listdir(Folder_Input)
Files_List = sorted(Files_List)

print('No. Of Files Found:',len(Files_List))
print(Files_List)

No. Of Files Found: 8
['artist.csv', 'canvas_size.csv', 'image_link.csv', 'museum.csv', 'museum_hours.csv', 'product_size.csv', 'subject.csv', 'work.csv']


# **Filter CSV Filenames**

In [31]:
CSV_Files_List = [filename for filename in Files_List if '.csv' in filename.lower()]
# CSV_Files_List = [filename for filename in Files_List if re.findall(r'\.csv',filename,re.I)]

print('No. Of CSV Files:', len(CSV_Files_List))
print(CSV_Files_List)

No. Of CSV Files: 8
['artist.csv', 'canvas_size.csv', 'image_link.csv', 'museum.csv', 'museum_hours.csv', 'product_size.csv', 'subject.csv', 'work.csv']


# **Extract SQL Table Names**

In [33]:
# METHOD 01 - Extract Table Name 
for filename in CSV_Files_List:
    table_name = filename.split('.')[0]
    print(table_name)

artist
canvas_size
image_link
museum
museum_hours
product_size
subject
work


In [42]:
# METHOD 02 - Extract Table Name 
for filename in CSV_Files_List:
    table_name = re.findall('(\w+)\.csv$',filename)[0]
    print(table_name)

artist
canvas_size
image_link
museum
museum_hours
product_size
subject
work


# **Read & Load To SQL Database**

In [62]:
# Load Data To SQL Database
from IPython.display import clear_output

for filename in CSV_Files:
    clear_output(wait=True)
    print('Reading & Loading: ',filename)

    df = pd.read_csv(f'{Folder_Input}/{filename}')
    display(df.head(10))
    table_name = re.findall('(\w+)\.csv$',filename)[0]
    # print(table_name)

    df.to_sql(name=table_name,con=Connection,if_exists='replace',index=False)

Reading & Loading:  work.csv


Unnamed: 0,work_id,name,artist_id,style,museum_id
0,160228,Still Life with Flowers and a Watch,615,Baroque,43.0
1,160236,Still Life with Fruit and a Beaker on a Cock's...,615,Baroque,43.0
2,160244,Still Life with Fruit and a Goldfinch,615,Baroque,43.0
3,160252,Still Life with Fruit and Oysters,615,Baroque,43.0
4,160260,"Still Life with Fruit, Oysters, and a Porcelai...",615,Baroque,43.0
5,160268,The Overturned Bouquet,615,Baroque,43.0
6,125752,Arabian Horses at Pasture,757,Baroque,
7,125818,Count Halm on His Basedow Estate,757,Baroque,
8,23448,Horses at the Porch,757,Baroque,34.0
9,125763,Napoleon Before the Burning City of Smolensk,757,Baroque,


In [61]:
df = pd.read_csv(f'{Folder_Input}/artist.csv')
df.dtypes

artist_id        int64
full_name       object
first_name      object
middle_names    object
last_name       object
nationality     object
style           object
birth            int64
death            int64
dtype: object

# **Read SQL Database Table**

In [57]:
df = pd.read_sql_table(table_name='artist',con=Connection,columns=[])
print(df.shape)
df.head()

(421, 9)


Unnamed: 0,artist_id,full_name,first_name,middle_names,last_name,nationality,style,birth,death
0,500,Pierre-Auguste Renoir,Pierre,Auguste,Renoir,French,Impressionist,0 days 00:30:41,1919
1,501,Alexandre Cabanel,Alexandre,,Cabanel,French,Classicist,0 days 00:30:23,1889
2,502,James Ensor,James,,Ensor,Belgian,Expressionist,0 days 00:31:00,1949
3,503,Maximilien Luce,Maximilien,,Luce,French,Pointillist,0 days 00:30:58,1941
4,504,August Macke,August,,Macke,German,Expressionist,0 days 00:31:27,1914
