# Used code to collect data

In [5]:
import pandas as pd
import pyodbc
import os
import re
from tkinter import simpledialog

batch_no = simpledialog.askinteger (title= "Batch No", prompt= "Please enter the batch no:")

mdb_folders_path = r"C:\Users\Khaled Ahmed\Desktop\fwd_analysis_ASH\011_Folder_to_collect_batch"
mdb_files_list = os.listdir(mdb_folders_path)
list_files_not_matching = []
pattern = r"H(\d+)-(M|R|m|r)-L(\d)-(\d+)-(\d+)"
main_files_dict = {}
summary_table_list = []
comments_table_list = []
drops_table_list = []
histories_table_list = []
remarks_table_list = []
sessions_table_list = []
stations_table_list = []
timings_table_list = []
transducers_table_list = []
version_table_list = []


for file in mdb_files_list:
    # Check if the file matches the pattern or not:
    try:
        # Extract data from the filename:
        name_text = re.search(pattern, file)
        if not name_text:
            list_files_not_matching.append(file)
            continue
        
        file_name = file
        road_no = name_text.group(1)
        dir_cd = name_text.group(2)
        lane_no = name_text.group(3)
        start_kp = name_text.group(4)
        end_kp = name_text.group(5)
        
        # Location of the access file
        mdb_file = os.path.join(mdb_folders_path, file)
        
        # Create a connection string
        conn_string = (
            r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
            r'DBQ=' + mdb_file + ';'
        )
        
        # The tables
        table_name_list = ['Sessions', 'Comments', 'Drops', 'Histories', 'Remarks', 'Stations', 'Timings', 'Transducers', 'Version']
        df_access_dict = {}
        
        # Create connection
        with pyodbc.connect(conn_string) as connection:
            cursor = connection.cursor()
            try:
                for table in table_name_list:
                    query = f'SELECT * FROM {table}'
                    df = pd.read_sql(query, connection)
                    df_access_dict[table] = df

                # Add the GUID value to all tables 
                guid_value = df_access_dict['Sessions']['GUID'][0]
                table_add_guid = ['Comments', 'Drops', 'Histories', 'Remarks', 'Stations', 'Timings', 'Transducers', 'Version']
                for table2 in table_add_guid:
                    df_access_dict[table2]['GUID'] = guid_value

                # Check the drops and the temperature in the Drops table:
                df = df_access_dict['Drops']
                for index in df.index:
                    if df.loc[index, 'D9'] < 0.5:
                        df.loc[index, 'D7'] = 0.8 * df.loc[index, 'D6']
                        df.loc[index, 'D8'] = 0.7 * df.loc[index, 'D7']
                        df.loc[index, 'D9'] = 0.6 * df.loc[index, 'D8']

                avg_asphalt = df[df['Asphalt'] > 0]['Asphalt'].mean()
                avg_surface = df[df['Asphalt'] > 0]['Surface'].mean()
                perct = avg_asphalt / avg_surface
                for index in df.index:
                    if df.loc[index, 'Asphalt'] < 0:
                        df.loc[index, 'Asphalt'] = perct * df.loc[index, 'Surface']
                df_access_dict['Drops'] = df

            finally:
                cursor.close()

        # Make a summary table for the data
        df_summary_table = pd.DataFrame(columns=["file_name", "road_no", "dir_cd", "lane_no", "start_kp", "end_kp", "batch_no", "guid"])
        new_row_values = {
            "file_name": file_name,
            "road_no": road_no,
            "dir_cd": dir_cd,
            "lane_no": lane_no,
            "start_kp": start_kp,
            "end_kp": end_kp,
            "batch_no": batch_no,
            "guid": guid_value
        }
        df_summary_table.loc[0] = new_row_values
        
        # Update the main dictionary that contains the data of all files of the batch 
        sub = {
            file: {
                "road_no": road_no,
                "dir_cd": dir_cd,
                "lane_no": lane_no,
                "start_kp": start_kp,
                "end_kp": end_kp,
                "batch_no": batch_no,
                "GUID": guid_value,
                "summary_table": df_summary_table,
                "df_access_dict": df_access_dict
            }
        }
        
        
        main_files_dict.update(sub)
        
        # ['Sessions', 'Comments', 'Drops', 'Histories', 'Remarks', 'Stations', 'Timings', 'Transducers', 'Version']
        summary_table_list.append (df_summary_table)
        comments_table_list.append (df_access_dict['Comments'])
        drops_table_list.append (df_access_dict['Drops'])
        # histories_table_list.append (df_access_dict['Histories'])
        remarks_table_list.append (df_access_dict['Remarks'])
        sessions_table_list.append (df_access_dict['Sessions'])
        stations_table_list.append (df_access_dict['Stations'])
        timings_table_list.append (df_access_dict['Timings'])
        transducers_table_list.append (df_access_dict['Transducers'])
        version_table_list.append (df_access_dict['Version'])

    except Exception as e:
        print(f"Error processing file {file}: {e}")
        list_files_not_matching.append(file)

# Print summary of unmatched files
print(f"Files not matching pattern: {list_files_not_matching}")

df_summary = pd.concat(summary_table_list, axis = 0)
df_comments = pd.concat(comments_table_list, axis = 0)
df_drops = pd.concat(drops_table_list, axis = 0)
# df_histories = pd.concat(histories_table_list, axis = 0)
df_remarks = pd.concat(remarks_table_list, axis = 0)
df_sessions = pd.concat(sessions_table_list, axis = 0)
df_stations = pd.concat(stations_table_list, axis = 0)
df_timings = pd.concat(timings_table_list, axis = 0)
df_transducers = pd.concat(transducers_table_list, axis = 0)
df_version = pd.concat(version_table_list, axis = 0)

  df = pd.read_sql(query, connection)
  df = pd.read_sql(query, connection)


Files not matching pattern: []


  df = pd.read_sql(query, connection)
  df = pd.read_sql(query, connection)


# Used code to upload data to database

In [7]:
#To upload the data to the database 
from sqlalchemy import create_engine
user = 'postgres'
password = 'mjesbb51187'
database = 'elmod_db'
host = 'localhost'
port = '5432'

engine = create_engine (f'postgresql://{user}:{password}@{host}:{port}/{database}')
data_for_postgres = {"summary_table":df_summary,
                     "comments": df_comments,
                     "drops":df_drops,
                     # "histories":df_histories,
                     "remarks":df_remarks,
                     "sessions":df_sessions,
                     "stations":df_stations,
                     "timings": df_timings,
                     "transducers":df_transducers,
                     "version":df_version}

with engine.connect() as connection:
    for table_name,df_total in data_for_postgres.items():
        df_total.to_sql (table_name, connection , if_exists = 'append', index = False)

# BELOW HERE IS DRAFT 

# Old code: check the problems

In [None]:
# import pandas as pd
# import pyodbc
# from sqlalchemy import create_engine
# import os, re

# batch_no = 16
# mdb_folders_path = r"C:\Users\Khaled Ahmed\Desktop\fwd_analysis_ASH\009_data_for_ASH_program\trial_batch_2"
# mdb_files_list = os.listdir(mdb_folders_path)
# list_files_not_matching = []
# pattern = r"H(\d+)-(M|R|m|r)-L(\d)-(\d+)-(\d+)"
# main_files_dict = {}

# for file in mdb_files_list:
#     #check file mathcing the pattern or not:
#     try:
#         #Extract data from the filename:
#         name_text = re.search (pattern, file)
#         file_name = file
#         road_no = name_text.group(1)
#         dir_cd = name_text.group(2)
#         lane_no = name_text.group(3)
#         start_kp = name_text.group(4)
#         end_kp = name_text.group(5)
        
#         #location of the access file
#         mdb_file = os.path.join (mdb_folders_path, file)
        
        
#         #Create a connection string
#         conn_string = r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'r'DBQ='+mdb_file+';'
        
#         #The tables 
#         table_name_list = ['Sessions', 'Comments', 'Drops', 'Histories', 'Remarks', 'Stations', 'Timings', 'Transducers', 'Version']
#         df_access_dict = {}
        
#         #Create connection
#         with pyodbc.connect (conn_string) as connection:
#             for table in table_name_list:
#                 query = f'SELECT * FROM {table}'
#                 df = pd.read_sql (query, connection)
#                 data_df = {table:df}
#                 df_access_dict.update (data_df)

#             #Add the GUID value to all tables 

#             #1. Get the GUID value from the Sessions table
#             guid_value = df_access_dict['Sessions']['GUID'][0]
            

#             #2. Add the value to the other tables (except the Sessions!)
#             table_add_guid = ['Comments', 'Drops', 'Histories', 'Remarks', 'Stations', 'Timings', 'Transducers', 'Version']
#             for table2 in table_add_guid:
#                 df_access_dict[table2]['GUID']= guid_value
#                 df = df_access_dict[table2]
#                 data_df = {table2:df}
#                 df_access_dict.update (data_df)

#             #Check the drops and the temperature in the drops table:
#             df = df_access_dict['Drops']

#             #Check the drops 
#             for index in df.index:
#                 if df.loc[index, 'D9'] < 0.5:
#                     df.loc[index, 'D7'] = 0.8 * df.loc[index, 'D6']
#                     df.loc[index, 'D8'] = 0.7 * df.loc[index, 'D7']
#                     df.loc[index, 'D9'] = 0.6 * df.loc[index, 'D8']


#             #Check the temperature
#             avg_asphalt = df_access_dict['Drops'][df['Asphalt']>0]['Asphalt'].mean()
#             avg_surface = df_access_dict['Drops'][df['Asphalt']>0]['Surface'].mean()
#             perct = avg_asphalt/avg_surface
#             for index in df.index:
#                 if df.loc[index, 'Asphalt'] < 0:
#                     df.loc[index, 'Asphalt'] = perct* df.loc[index, 'Surface']

#             df_access_dict.update ({'Drops':df})

#         #Make a summary table for the data
#         df_summary_table = pd.DataFrame (columns = ["file_name", "road_no", "dir_cd","lane_no", "start_kp", "end_kp", "batch_no",
#                                                    "guid"])
#         new_row_values = {
#                 "file_name": file_name,
#                 "road_no": road_no,
#                 "dir_cd": dir_cd,
#                 "lane_no": lane_no,
#                 "start_kp": start_kp,
#                 "end_kp": end_kp,
#                 "batch_no": batch_no,
#                 "guid": guid_value
#             }
        
#         df_summary_table.loc[0]= new_row_values
        
        
        
#         #Update the main dictionnary that contains the data of all files of the batch 
#         sub = {file:{
#                     "road_no":road_no,
#                     "dir_cd": dir_cd,
#                     "lane_no": lane_no,
#                     "start_kp": start_kp,
#                     "end_kp": end_kp,
#                     "batch_no":batch_no,
#                     "GUID": guid_value,
#                     "summary_table":df_summary_table,
#                     "df_access_dict": df_access_dict
#         }
#               }
#         main_files_dict.update(sub)
        
        
#     except:
#         list_files_not_matching.append(file)
        


# To close the connection of a file

In [11]:
mdb_file = r"C:\Users\Khaled Ahmed\Desktop\fwd_analysis_ASH\009_data_for_ASH_program\trial_batch\H70-M-L3-435-458.mdb"
conn_string = r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'r'DBQ='+mdb_file+';'
connection = pyodbc.connect (conn_string)

connection.close()

# To be checked for chuck data upload

In [None]:
################## To be checkeed   for mmore efficient data upload to the database
################################################
import psycopg2
from psycopg2.extras import execute_values

# Convert DataFrame to list of tuples
def df_to_tuples(df):
    return [tuple(x) for x in df.to_numpy()]

# Bulk insert using psycopg2
def bulk_insert(table_name, df, conn):
    tuples = df_to_tuples(df)
    columns = ','.join(df.columns)
    values = ','.join(['%s' for _ in df.columns])
    insert_query = f'INSERT INTO {table_name} ({columns}) VALUES {values}'
    with conn.cursor() as cursor:
        execute_values(cursor, insert_query, tuples)
        conn.commit()

# Create a connection using psycopg2
conn = psycopg2.connect(user=user, password=password, database=database, host=host, port=port)

# Bulk insert each DataFrame
for table_name, df_total in data_for_postgres.items():
    bulk_insert(table_name, df_total, conn)

# Close the connection
conn.close()


# Upload data to the EMPTY ACCESS FILE

In [3]:
#import required libraries
import pandas as pd
import sqlalchemy, pyodbc

#Connect to the postgres database to retreive the table
user = 'postgres'
password = 'mjesbb51187'
database = 'elmod_db'
host = 'localhost'
port = '5432'


In [5]:
engine = sqlalchemy.create_engine (f'postgresql://{user}:{password}@{host}:{port}/{database}')

In [6]:
with engine.connect() as connection:
    query = r'SELECT * FROM drops'
    df = pd.read_sql(query, connection)

In [7]:
df

Unnamed: 0,StationID,DropID,History,DateTime,Asphalt,Surface,Air,Stress,Force,D1,...,D7,D8,D9,D10,D11,D12,D13,D14,D15,GUID
0,1,1,True,2023-01-19 11:36:30,26.480635,28.500000,22.299999,571.0,40.380001,126.400002,...,13.6,10.2,2.3,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
1,1,2,True,2023-01-19 11:36:32,26.480635,28.500000,22.299999,716.0,50.630001,160.600006,...,17.6,13.0,2.3,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
2,2,3,True,2023-01-19 11:38:11,26.299999,27.600000,22.000000,569.0,40.240002,67.099998,...,7.7,6.8,5.5,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
3,2,4,True,2023-01-19 11:38:12,26.299999,27.600000,22.000000,717.0,50.669998,83.400002,...,10.0,8.2,6.3,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
4,3,5,True,2023-01-19 11:39:23,26.299999,27.500000,21.799999,571.0,40.330002,90.800003,...,9.9,7.1,5.2,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,30,60,True,2023-01-19 12:20:14,26.500000,28.600000,21.400000,716.0,50.580002,91.099998,...,10.7,8.1,3.6,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
60,31,61,True,2023-01-19 12:21:34,26.500000,28.299999,21.500000,567.0,40.099998,112.900002,...,16.1,9.9,6.6,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
61,31,62,True,2023-01-19 12:21:36,26.500000,28.299999,21.500000,717.0,50.650002,141.300003,...,22.0,12.7,9.0,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855
62,32,63,True,2023-01-19 12:22:49,26.500000,27.799999,21.200001,568.0,40.169998,125.300003,...,9.8,5.7,3.6,0.0,0.0,0.0,0.0,0.0,0.0,B447906BDC044069AF7676297E354855


In [11]:
#Connect to the Microsoft Access Database
mdb_file_path = r"C:\Users\Khaled Ahmed\Desktop\fwd_analysis_ASH\Empty_Data_Base.mdb"

conn_str = r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'r'DBQ='+mdb_file_path+';'

In [20]:
# Connect to the Access database
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()

# Define the Access table name
access_table_name = 'Drops'

# Create insert query template with column names enclosed in square brackets
columns = ", ".join([f"[{col}]" for col in df.columns])
placeholders = ", ".join(["?" for _ in df.columns])
insert_query = f"INSERT INTO {access_table_name} ({columns}) VALUES ({placeholders})"

# Print insert query for debugging
print("Insert Query:", insert_query)

# Insert DataFrame into Access table
for index, row in df.iterrows():
    print("Row:", tuple(row))  # Print the row for debugging
    cursor.execute(insert_query, tuple(row))

# Commit the transaction
conn.commit()

# Close the connection
cursor.close()
conn.close()

Insert Query: INSERT INTO Drops ([StationID], [DropID], [History], [DateTime], [Asphalt], [Surface], [Air], [Stress], [Force], [D1], [D2], [D3], [D4], [D5], [D6], [D7], [D8], [D9], [D10], [D11], [D12], [D13], [D14], [D15]) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
Row: (1, 1, True, Timestamp('2023-01-19 11:36:30'), 26.480635302301692, 28.5, 22.299999237060547, 571.0, 40.380001068115234, 126.4000015258789, 97.0999984741211, 76.0, 52.70000076293945, 36.599998474121094, 20.200000762939453, 13.600000381469727, 10.199999809265137, 2.299999952316284, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
Row: (1, 2, True, Timestamp('2023-01-19 11:36:32'), 26.480635302301692, 28.5, 22.299999237060547, 716.0, 50.630001068115234, 160.60000610351562, 122.80000305175781, 97.80000305175781, 68.5, 47.20000076293945, 26.200000762939453, 17.600000381469727, 13.0, 2.299999952316284, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
Row: (2, 3, True, Timestamp('2023-01-19 11:38:11'), 26.299999237060547, 27.60

In [18]:
df.drop(columns = ['GUID'], inplace = True)

In [19]:
df

Unnamed: 0,StationID,DropID,History,DateTime,Asphalt,Surface,Air,Stress,Force,D1,...,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15
0,1,1,True,2023-01-19 11:36:30,26.480635,28.500000,22.299999,571.0,40.380001,126.400002,...,20.200001,13.6,10.2,2.3,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,True,2023-01-19 11:36:32,26.480635,28.500000,22.299999,716.0,50.630001,160.600006,...,26.200001,17.6,13.0,2.3,0.0,0.0,0.0,0.0,0.0,0.0
2,2,3,True,2023-01-19 11:38:11,26.299999,27.600000,22.000000,569.0,40.240002,67.099998,...,9.700000,7.7,6.8,5.5,0.0,0.0,0.0,0.0,0.0,0.0
3,2,4,True,2023-01-19 11:38:12,26.299999,27.600000,22.000000,717.0,50.669998,83.400002,...,12.100000,10.0,8.2,6.3,0.0,0.0,0.0,0.0,0.0,0.0
4,3,5,True,2023-01-19 11:39:23,26.299999,27.500000,21.799999,571.0,40.330002,90.800003,...,14.500000,9.9,7.1,5.2,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,30,60,True,2023-01-19 12:20:14,26.500000,28.600000,21.400000,716.0,50.580002,91.099998,...,17.900000,10.7,8.1,3.6,0.0,0.0,0.0,0.0,0.0,0.0
60,31,61,True,2023-01-19 12:21:34,26.500000,28.299999,21.500000,567.0,40.099998,112.900002,...,27.799999,16.1,9.9,6.6,0.0,0.0,0.0,0.0,0.0,0.0
61,31,62,True,2023-01-19 12:21:36,26.500000,28.299999,21.500000,717.0,50.650002,141.300003,...,37.299999,22.0,12.7,9.0,0.0,0.0,0.0,0.0,0.0,0.0
62,32,63,True,2023-01-19 12:22:49,26.500000,27.799999,21.200001,568.0,40.169998,125.300003,...,17.600000,9.8,5.7,3.6,0.0,0.0,0.0,0.0,0.0,0.0
