#### Importing necessary libraries

In [205]:
import pandas as pd
import os

#### Importing data

In [206]:
# Specify the relative path to the starting CSV file from the current working directory
relative_path = os.path.join('..', 'starting_file', 'drug_starting.csv')

# Construct the absolute path
absolute_path = os.path.abspath(relative_path)

# Read the CSV file into a DataFrame
df = pd.read_csv(absolute_path)

#### Viewing DataFrame

In [207]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,\r\n\t\t\t\t\t This is a generic drug. The ave...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...


#### Viewing DataFrame shape

In [208]:
df.shape

(2219, 9)

#### Checking for null values

In [209]:
null_counts = df.isnull().sum()

print(null_counts)

Condition       0
Drug            0
Indication      0
Type            0
Reviews         0
Effective       0
EaseOfUse       0
Satisfaction    0
Information     0
dtype: int64


#### Viewing column names

In [210]:
df.columns

Index(['Condition', 'Drug', 'Indication', 'Type', 'Reviews', 'Effective',
       'EaseOfUse', 'Satisfaction', 'Information'],
      dtype='object')

#### Removing double quotes from  specific columns

In [211]:
# Columns to remove double quotes from
columns_to_clean = ['Condition', 'Drug', 'Indication', 'Type', 'Reviews']

# Remove double quotes from specified columns
for col in columns_to_clean:
    df[col] = df[col].str.replace('"', '')

#### Viewing cleaned DataFrame

In [212]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,\r\n\t\t\t\t\t This is a generic drug. The ave...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...


#### Removing \r, \n, and \t characters from the 'Information' column

In [213]:
df['Information'] = df['Information'].replace({'\r': '', '\n': '', '\t': ''}, regex=True)

In [214]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Removing non-numeric characters from the 'Reviews' column

In [215]:
df['Reviews'] = df['Reviews'].str.replace(r'\D+', '')

#### Removing the word "Reviews" from the 'Reviews' column

In [217]:
df['Reviews'] = df['Reviews'].str.replace('Reviews', '')

In [218]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Converting Reviews column to int

In [219]:
if df['Reviews'].dtype != 'int64':
    df['Reviews'] = df['Reviews'].astype(int)

#### Viewing column types

In [220]:
print(df.dtypes)

Condition        object
Drug             object
Indication       object
Type             object
Reviews           int32
Effective       float64
EaseOfUse       float64
Satisfaction    float64
Information      object
dtype: object
