### Installing  cx_oracle

In [1]:
# !pip install cx_oracle

### Importing Packages

In [2]:
from sqlalchemy import create_engine
import cx_Oracle

### Oracle Server Credentials

In [3]:
ip = 'localhost'                                                             
port=1521                                                             
SID='XE'
USER = 'sys'
PASSWORD = 'oracle'
dsn_tns = cx_Oracle.makedsn(ip, port, SID)                                     

In [4]:
dsn_tns

'(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=localhost)(PORT=1521))(CONNECT_DATA=(SID=XE)))'

### Changing Directory to Oracle Instant Client Directory

In [5]:
import os
os.chdir("E:\\Oracle\\instantclient_19_5")

### To create a connection from Input

We can either create a connection or a Engine.

(`cx_Oracle` connection is having issues while writing to Oracle Database)

In [6]:
# Connecting as SYSDBA
# conn = cx_Oracle.connect('SYS', 'oracle', dsn_tns, cx_Oracle.SYSDBA)

Or

### To create a Engine from Input

In [7]:
# Mode 2 is for SYSDBA
db_connection_str = 'oracle+cx_oracle://' + USER + ':' + PASSWORD + '@' + ip + ':' + str(port) + '/' + SID + '?mode=2'

In [8]:
db_connection_str

'oracle+cx_oracle://sys:oracle@localhost:1521/XE?mode=2'

In [9]:
db_connection = create_engine(db_connection_str)

### Importing Pandas Dataframe

In [10]:
import pandas as pd

### Reading From Table

In [11]:
try:
    df = pd.read_sql('SELECT * FROM SLI_ATTRIBUTES', con=db_connection)
except Exception as e:
    print(e)

In [12]:
df

Unnamed: 0,avg_standard_production_cost,gic,sales_item_id,purchasing_group,sales_item_description,product_lifecycle_state


### Checking Table Schema

In [13]:
df.columns

Index(['avg_standard_production_cost', 'gic', 'sales_item_id',
       'purchasing_group', 'sales_item_description',
       'product_lifecycle_state'],
      dtype='object')

### Reading from CSV File

In [14]:
file_df = pd.read_csv('F:\\My_Practice\\Database_Connector\\Resources\\SI_attr.tab', delimiter='\t', encoding='utf-8')

### Displaying Top 5 Rows

In [15]:
file_df.head()

Unnamed: 0,Sales_Item_Id,Sales_Item_Description,GIC,Product_Lifecycle_State,Purchasing_Group,Average Standard Production Cost
0,IE2:15HP-RFU-7,7GHZ HP ODU,7599,OBSOLETE,,0.0
1,IE2:15HP-RFU-8,"1500HP RF UNIT, FGHZ (8GHZ)",7599,OBSOLETE,,0.0
2,IE2:15HP-SHORT-112,"1500HP SHORT, FGHZ",7599,OBSOLETE,,0.0
3,IE2:15HP-SHORT-137,"1500P SHORT, FGHZ",7599,OBSOLETE,,0.0
4,IE2:15HP-TERM-112,"1500HP 50 OHM TERMINATION, FGHZ",9208,OBSOLETE,,0.0


### Count

In [16]:
len(file_df)

361560

### Renaming the last column and saving back to dataframe

In [17]:
file_df.rename(columns={'Average Standard Production Cost': 'AVG_STANDARD_PRODUCTION_COST'}, inplace=True)

### Checking Column Names

In [18]:
file_df.columns

Index(['Sales_Item_Id', 'Sales_Item_Description', 'GIC',
       'Product_Lifecycle_State', 'Purchasing_Group',
       'AVG_STANDARD_PRODUCTION_COST'],
      dtype='object')

### Converting all headers to Lower Case

In [19]:
file_df.columns = file_df.columns.str.upper()

### Verifying Column Names

In [20]:
file_df.columns

Index(['SALES_ITEM_ID', 'SALES_ITEM_DESCRIPTION', 'GIC',
       'PRODUCT_LIFECYCLE_STATE', 'PURCHASING_GROUP',
       'AVG_STANDARD_PRODUCTION_COST'],
      dtype='object')

### Dropping Null Values from Primary Key

In [21]:
file_df.dropna(subset=['SALES_ITEM_ID'], inplace=True)

### Checking length of dataframe after dropping null values

In [22]:
len(file_df)

361559

### Checking any Duplicate Records for Primary Key

In [23]:
file_df['SALES_ITEM_ID'].duplicated().any()

False

Or

In [24]:
file_df.duplicated(subset=['SALES_ITEM_ID']).any()

False

### Dropping Duplicate Records from Primary Key

In [25]:
file_df.drop_duplicates(subset=['SALES_ITEM_ID'], inplace=True)

In [26]:
len(file_df)

361559

### Checking Shape of the Dataframe

In [27]:
file_df.shape

(361559, 6)

### Limiting to 1000 records

In [28]:
file_df = file_df[:1000]

In [29]:
len(file_df)

1000

### Writing to ORACLE Database

In [30]:
try:
    file_df.to_sql(name='SLI_ATTRIBUTES', con=db_connection, if_exists='append', index=False)
    print("Data Saved Successfully")
except Exception as e:
    print(e)
    print("Data saving failed")

'ascii' codec can't encode character '\xa0' in position 18: ordinal not in range(128)
Data saving failed


### Checking Datatype of Each Column

In [31]:
for c in file_df.columns:
    print('Column: {}'.format(c))
    print('Datatype of {} is {}'.format(c, file_df[c].dtype))
    print()

Column: SALES_ITEM_ID
Datatype of SALES_ITEM_ID is object

Column: SALES_ITEM_DESCRIPTION
Datatype of SALES_ITEM_DESCRIPTION is object

Column: GIC
Datatype of GIC is object

Column: PRODUCT_LIFECYCLE_STATE
Datatype of PRODUCT_LIFECYCLE_STATE is object

Column: PURCHASING_GROUP
Datatype of PURCHASING_GROUP is object

Column: AVG_STANDARD_PRODUCTION_COST
Datatype of AVG_STANDARD_PRODUCTION_COST is float64



### Replacing non-ascii characters with blank for of each string column

In [32]:
for c in file_df.columns:
    if(file_df[c].dtype == 'object'):
        file_df[c].replace({r'[^\x00-\x7F]+':''}, regex=True, inplace=True)

In [33]:
try:
    file_df.to_sql(name='sli_attributes', con=db_connection, if_exists='append', index=False)
    print("Data Saved Successfully")
except Exception as e:
    print(e)
    print("Data saving failed")

Data Saved Successfully


### Disposing Engine

In [34]:
db_connection.dispose()