In [1]:
import pandas as pd 
import numpy as np

In [2]:
path = r"C:\Users\zaid.allawanseh\Local Chatbot\data\Medical Network arabic. update 27-10-2025.xls"

sheet_names =pd.ExcelFile(path).sheet_names
print(sheet_names)

['أطباء ', 'أسنان', 'صيدليات', 'مختبرات', 'مراكز متخصصة', 'البصريات', 'أشعة', 'مستشفيات']


In [3]:
#read the subsheet 
def process_doctors_sheet(path, sub_sheet_name):

    df= pd.read_excel(path, sheet_name= sub_sheet_name)

    # Keep only the first 5 columns , the rest is null
    df = df.iloc[:, :5]

    #add a col for the main category (the sheet name)
    df['Main Category'] = sub_sheet_name

    # add a col for the sub category
    df['Sub Category'] = ''

    # mask where the 4th column equals the header marker 'العنوان'
    mask = df.iloc[:, 3].astype(str).str.strip() == 'العنوان'
    positions = np.where(mask)[0].tolist()  # integer positions of markers


    # for each marker, take the previous row's first column as the subcategory
    # and assign it to all rows after the marker up to the next marker (or end)
    for i, pos in enumerate(positions):
        if pos == 0:
            continue  # nothing above row 0
        subcat = df.iloc[pos - 1, 0]
        #print(f"Assigning subcategory '{subcat}' starting at row {pos + 1}")
        start = pos + 1
        end = positions[i + 1] - 1 if i + 1 < len(positions) else len(df) - 1
        if start <= end:
            df.iloc[start : end + 1, df.columns.get_loc('Sub Category')] = subcat

    #print("Assigned subcategories count:", df['Sub Category'].astype(bool).sum())

    # add the first subcategory 
    df['Sub Category']= df['Sub Category'].replace('', df.columns[0])

    # drop the marker rows and the rows above them
    to_drop_pos = sorted({p for pos in positions 
                        for p in (pos, pos-1) if p >= 0})
    df = df.drop(index=to_drop_pos).reset_index(drop=True)

    #remove the first two columns
    df =df.iloc[:,2:]

    #rename the columns
    df.rename(columns={df.columns[0]:'Name',
                    df.columns[1]:'location',
                        df.columns[2]:'Phone' }, inplace=True) 
    return df

In [4]:
main_df=pd.DataFrame()

for sheet in sheet_names:
    sub_df=process_doctors_sheet(path, sheet)
    main_df=pd.concat([main_df, sub_df], ignore_index=True) 

In [5]:
main_df.head()

Unnamed: 0,Name,location,Phone,Main Category,Sub Category
0,مخلص مزاهرة,الدوار السابع,5820425,أطباء,الطب العام
1,عيادة ابن النفيس الطبية,الدوار السابع,5858343,أطباء,الطب العام
2,جوسانتي للرعاية الصحية,الدوار السابع,5804444,أطباء,الطب العام
3,اويس الرواشدة,الدوار السابع,797113535,أطباء,الطب العام
4,احمد صالح ابو رمان,الدوار الثامن,790076768,أطباء,الطب العام


In [6]:
import urllib
from sqlalchemy import create_engine
from sqlalchemy.types import NVARCHAR

conn_str = (
    "DRIVER={SQL Server};"
    "SERVER=zaid-allawanseh;"             # or your server name / instance
    "DATABASE=InsuranceNetwork;"
    "Trusted_Connection=yes;"
)
params = urllib.parse.quote_plus(conn_str)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

In [7]:
server = 'zaid-allawanseh'  # e.g., 'localhost' or 'your_server_ip'
database = 'InsuranceNetwork'

# Connection string for pyodbc driver
connection_string = (
    f'mssql+pyodbc://{server}/{database}?'
    'driver=ODBC+Driver+17+for+SQL+Server'
)

# Create a SQLAlchemy engine
engine = create_engine(connection_string)

In [8]:
#main_df.to_csv(r"C:\Users\dana.alnadi\Documents\medical_network_cleaned.csv", index=False, encoding='utf-8-sig')
from sqlalchemy.types import NVARCHAR, Integer

conn = engine.raw_connection()
cursor = conn.cursor()
cursor.fast_executemany = True

dtype_mapping = {
    'Name': NVARCHAR(200),
    'location': NVARCHAR(200),
    'Phone': NVARCHAR(100),
    'Main Category': NVARCHAR(100),
    'Sub Category': NVARCHAR(100)
}

main_df.to_sql(
    name='FullNetwork',
    con=engine,
    if_exists='replace',
    index=False,
    dtype=dtype_mapping
)

cursor.close()
conn.close()