# Connection to the SQL Server

In [28]:
import pyodbc
try:
    con = pyodbc.connect(
        'DRIVER={Odbc Driver 17 for SQL Server};'
        'SERVER=DESKTOP-4IVU8N4\\MSSQLSERVER1;'
        'UID=sa;'
        'PWD=abcd;'
    )
    print("Connected successfully")
except Exception as e:
    print("Failed to connect:", e)

Connected successfully


# Database Creation through Python

In [29]:
try:
    cursor=con.cursor()
    demo = 'demo'
    create_db_query=f"Create Database {demo};"
    cursor.execute(create_db_query)
    print("database created successfully")
except Exception as e:
    print("Database was not created successfully!!, Try again")

Database was not created successfully!!, Try again


In [30]:
import pandas as pd
df_customers= pd.read_csv("us_customer_data 1.csv")

In [31]:
df_customers.dtypes

customer_id           int64
name                 object
email                object
phone                object
address              object
registration_date    object
loyalty_status       object
dtype: object

In [32]:
df_customers['registration_date']=pd.to_datetime(df_customers['registration_date'],dayfirst=True)

In [33]:
df_customers.dtypes

customer_id                   int64
name                         object
email                        object
phone                        object
address                      object
registration_date    datetime64[ns]
loyalty_status               object
dtype: object

# Tables Insertion

In [34]:
from sqlalchemy import create_engine

In [35]:
try:
    engine= create_engine('mssql+pyodbc://sa:abcd@DESKTOP-4IVU8N4\MSSQLSERVER1/demo?driver=Odbc+Driver+17+for+SQL+Server')
    print("database connection done")
except Exception as e:
    print("not done")

database connection done


In [36]:
df_transaction = pd.read_csv("transaction_data.csv")
df_orders = pd.read_csv("order_data 1.csv")

In [37]:
try:
    df_customers.to_sql('Customers',con=engine,if_exists='replace',index=False)
    df_transaction.to_sql("Transactions",con=engine,if_exists='replace',index=False)
    df_orders.to_sql("Orders",con=engine,if_exists='replace',index=False)
    print("Tables are inserted")
except Exception as e:
        print("Bro it is not inserted!!",e)

Tables are inserted


# Data Transformation Requirements

#### 1. Name Processing 
#### Task: Split the Name column into two separate fields:
#### First Name, Last Name 
#### Use the space as a separator for splitting.
#### Additional Processing:
#### Remove common prefixes:
#### Examples: Mr., Mrs., Miss, Dr.
#### Remove common suffixes:
#### Examples: Jr., Sr., II, III

In [38]:
import spacy

In [39]:
df_customers.head(2)

Unnamed: 0,customer_id,name,email,phone,address,registration_date,loyalty_status
0,1,Michelle Kidd,vayala@example.net,619-723-4258,"USNS Santiago, FPO AE 80872",2025-01-25,Gold
1,2,Brad Newton,taylorcatherine@example.net,537-674-1158,"38783 Oliver Street, West Kristenborough, MT 9...",2023-07-13,Silver


In [40]:
nlp=spacy.load("en_core_web_sm")

In [41]:
def clean_name(name):
    doc=nlp(str(name))
    letters= [token.text for token in doc if token.ent_type_=='PERSON' or token.pos_ == 'PROPN']
    return ' '.join(letters)

In [42]:
df_customers['clean_name']=df_customers['name'].apply(clean_name)

In [43]:
df_customers['first_name']=df_customers['clean_name'].str.split().str[0]
df_customers['last_name']=df_customers['clean_name'].str.split().str[-1]

In [44]:
df_customers[['name','clean_name','first_name','last_name']]

Unnamed: 0,name,clean_name,first_name,last_name
0,Michelle Kidd,Michelle Kidd,Michelle,Kidd
1,Brad Newton,Brad Newton,Brad,Newton
2,Larry Torres,Larry Torres,Larry,Torres
3,Kimberly Price,Kimberly Price,Kimberly,Price
4,Matthew Phillips,Matthew Phillips,Matthew,Phillips
...,...,...,...,...
995,Jerry Mcdaniel,Jerry Mcdaniel,Jerry,Mcdaniel
996,Jodi Simpson,Jodi Simpson,Jodi,Simpson
997,Crystal Brown,Crystal Brown,Crystal,Brown
998,Gregory Duarte,Gregory Duarte,Gregory,Duarte


In [45]:
df_customers

Unnamed: 0,customer_id,name,email,phone,address,registration_date,loyalty_status,clean_name,first_name,last_name
0,1,Michelle Kidd,vayala@example.net,619-723-4258,"USNS Santiago, FPO AE 80872",2025-01-25,Gold,Michelle Kidd,Michelle,Kidd
1,2,Brad Newton,taylorcatherine@example.net,537-674-1158,"38783 Oliver Street, West Kristenborough, MT 9...",2023-07-13,Silver,Brad Newton,Brad,Newton
2,3,Larry Torres,dsanchez@example.net,810-256-4505,"6845 Steele Turnpike, West Erikabury, UT 37487",2023-08-18,Bronze,Larry Torres,Larry,Torres
3,4,Kimberly Price,jessicaknight@example.com,423-222-9779,"1631 Alexis Meadows, Lake Amanda, CA 75179",2024-12-08,Gold,Kimberly Price,Kimberly,Price
4,5,Matthew Phillips,qwilliams@example.com,220-763-3522,"2274 Williams Heights Suite 895, Andersonhaven...",2024-02-03,Gold,Matthew Phillips,Matthew,Phillips
...,...,...,...,...,...,...,...,...,...,...
995,996,Jerry Mcdaniel,walkerlisa@example.net,638-989-9441,"34746 Smith Gateway, New Sarah, AS 12715",2025-02-10,Silver,Jerry Mcdaniel,Jerry,Mcdaniel
996,997,Jodi Simpson,eric24@example.org,483-625-2940,"2876 Tucker Road Suite 947, North Tommyborough...",2024-04-18,Bronze,Jodi Simpson,Jodi,Simpson
997,998,Crystal Brown,pshaffer@example.net,390-747-3088,"095 Janice Forest Suite 570, Boltonmouth, DE 7...",2024-08-30,Bronze,Crystal Brown,Crystal,Brown
998,999,Gregory Duarte,caitlindunlap@example.org,257-409-8196,"Unit 6377 Box 7662, DPO AP 03300",2024-05-16,Gold,Gregory Duarte,Gregory,Duarte


### 2. Country Code Extraction and Mobile Number Enrichment
###    Task: Extract the country code from the Address field.
###    Enrichment: Map the extracted country code to its respective international phone dialing code.
###    Prepend this dialing code to the customer's Mobile Number to form a complete, internationally formatted phone number. 

In [46]:
country_codes=pd.read_csv('Country-codes.csv',encoding ='latin1')

In [47]:
def country(df):
    for index,rows in df_customers.iterrows():
        df_customers.at[index,'Country_Code']=rows['address'][-8:-6]

In [48]:
country(df_customers)

In [49]:
df_customers['phone']=df_customers['phone'].str.replace("-"," ")

In [50]:
df_customers['phone']=df_customers['phone'].astype(str)
for i in range(len(df_customers)):
    code = df_customers.loc[i,'Country_Code']
    found=False
    for j in range(len(country_codes)):
        if code == country_codes.loc[j,'Country_code']:
            dial = country_codes.loc[j,'International_dialing']
            df_customers.loc[i,'phone']=(str(dial)+" "+df_customers.loc[i,'phone'])
            found=True
            break
    if not found:
      df_customers.loc[i,'phone']=('+1 '+df_customers.loc[i,'phone'])

In [51]:
df_customers.head()

Unnamed: 0,customer_id,name,email,phone,address,registration_date,loyalty_status,clean_name,first_name,last_name,Country_Code
0,1,Michelle Kidd,vayala@example.net,+971 619 723 4258,"USNS Santiago, FPO AE 80872",2025-01-25,Gold,Michelle Kidd,Michelle,Kidd,AE
1,2,Brad Newton,taylorcatherine@example.net,+356 537 674 1158,"38783 Oliver Street, West Kristenborough, MT 9...",2023-07-13,Silver,Brad Newton,Brad,Newton,MT
2,3,Larry Torres,dsanchez@example.net,+1 810 256 4505,"6845 Steele Turnpike, West Erikabury, UT 37487",2023-08-18,Bronze,Larry Torres,Larry,Torres,UT
3,4,Kimberly Price,jessicaknight@example.com,+1 423 222 9779,"1631 Alexis Meadows, Lake Amanda, CA 75179",2024-12-08,Gold,Kimberly Price,Kimberly,Price,CA
4,5,Matthew Phillips,qwilliams@example.com,+1 220 763 3522,"2274 Williams Heights Suite 895, Andersonhaven...",2024-02-03,Gold,Matthew Phillips,Matthew,Phillips,OR


### 3. Customer Classification
Task:
Add a new column named Customer_Tier based on the following mappings:

In [52]:
df_customers['loyalty_status'].unique()

array(['Gold', 'Silver', 'Bronze'], dtype=object)

In [53]:
# The count of unique field values
df_customers['loyalty_status'].nunique()

3

In [54]:
status = df_customers['loyalty_status'].unique()

In [55]:
# The count of unique field values
len(df_customers['loyalty_status'].unique())

3

In [56]:
value_maps={val:i for i,val in enumerate(status,start=1)}
# value_maps={val:i+1 for i,val in enumerate(status,start=1)}

In [57]:
df_customers['loyal_status']=df_customers['loyalty_status'].map(value_maps)

In [58]:
df_customers['loyal_status'].unique()

array([1, 2, 3])

In [59]:
df_customers.head()

Unnamed: 0,customer_id,name,email,phone,address,registration_date,loyalty_status,clean_name,first_name,last_name,Country_Code,loyal_status
0,1,Michelle Kidd,vayala@example.net,+971 619 723 4258,"USNS Santiago, FPO AE 80872",2025-01-25,Gold,Michelle Kidd,Michelle,Kidd,AE,1
1,2,Brad Newton,taylorcatherine@example.net,+356 537 674 1158,"38783 Oliver Street, West Kristenborough, MT 9...",2023-07-13,Silver,Brad Newton,Brad,Newton,MT,2
2,3,Larry Torres,dsanchez@example.net,+1 810 256 4505,"6845 Steele Turnpike, West Erikabury, UT 37487",2023-08-18,Bronze,Larry Torres,Larry,Torres,UT,3
3,4,Kimberly Price,jessicaknight@example.com,+1 423 222 9779,"1631 Alexis Meadows, Lake Amanda, CA 75179",2024-12-08,Gold,Kimberly Price,Kimberly,Price,CA,1
4,5,Matthew Phillips,qwilliams@example.com,+1 220 763 3522,"2274 Williams Heights Suite 895, Andersonhaven...",2024-02-03,Gold,Matthew Phillips,Matthew,Phillips,OR,1


# Combining customer and order data from a SQL Server database for a unified customer view.

In [60]:
combination_df=pd.merge(df_customers,df_orders,on='customer_id')

In [61]:
combination_df['email'] = combination_df['email'].fillna(combination_df['name'].str.replace(' ','').str.lower()+'@unknown.com')

In [62]:
combination_df.isna().sum()

customer_id          0
name                 0
email                0
phone                0
address              0
registration_date    0
loyalty_status       0
clean_name           0
first_name           0
last_name            0
Country_Code         0
loyal_status         0
order_id             0
order_date           0
order_amount         0
order_status         0
product_category     0
dtype: int64

In [63]:
combination_df.to_sql('Custoder',con=engine,if_exists='replace',index=False)

32