#### Importing necessary libraries

In [34]:
import pandas as pd
import os

#### Importing data

In [35]:
# Specify the relative path to the starting CSV file from the current working directory
relative_path = os.path.join('..', 'starting_file', 'drug_starting.csv')

# Construct the absolute path
absolute_path = os.path.abspath(relative_path)

# Read the CSV file into a DataFrame
df = pd.read_csv(absolute_path)

#### Viewing DataFrame

In [36]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,\r\n\t\t\t\t\t This is a generic drug. The ave...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...


#### Viewing DataFrame shape

In [37]:
df.shape

(2219, 9)

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

#### Data cleaning and preparing for analysis

#### Checking for null values

In [38]:
null_counts = df.isnull().sum()

print(null_counts)

Condition       0
Drug            0
Indication      0
Type            0
Reviews         0
Effective       0
EaseOfUse       0
Satisfaction    0
Information     0
dtype: int64


#### Viewing column names

In [39]:
df.columns

Index(['Condition', 'Drug', 'Indication', 'Type', 'Reviews', 'Effective',
       'EaseOfUse', 'Satisfaction', 'Information'],
      dtype='object')

#### Removing double quotes from  specific columns

In [40]:
# Columns to remove double quotes from
columns_to_clean = ['Condition', 'Drug', 'Indication', 'Type', 'Reviews']

# Remove double quotes from specified columns
for col in columns_to_clean:
    df[col] = df[col].str.replace('"', '')

#### Viewing cleaned DataFrame

In [41]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,\r\n\t\t\t\t\tLevofloxacin is used to treat a ...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,\r\n\t\t\t\t\t This is a generic drug. The ave...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,\r\n\t\t\t\t\tAzithromycin is an antibiotic (m...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,\r\n\t\t\t\t\tClotrimazole is used to treat sk...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,\r\n\t\t\t\t\tThis medication is used to treat...


#### Removing \r, \n, and \t characters from the 'Information' column

In [42]:
df['Information'] = df['Information'].replace({'\r': '', '\n': '', '\t': ''}, regex=True)

In [43]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994 Reviews,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755 Reviews,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584 Reviews,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2 Reviews,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1 Reviews,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1 Reviews,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1 Reviews,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Removing non-numeric characters from the 'Reviews' column

In [44]:
df['Reviews'] = df['Reviews'].str.replace(r'\D+', '')

#### Removing the word "Reviews" from the 'Reviews' column

In [45]:
df['Reviews'] = df['Reviews'].str.replace('Reviews', '')

In [46]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
1,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
4,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Converting Reviews column to int

In [47]:
if df['Reviews'].dtype != 'int64':
    df['Reviews'] = df['Reviews'].astype(int)

#### Viewing column types

In [48]:
print(df.dtypes)

Condition        object
Drug             object
Indication       object
Type             object
Reviews           int32
Effective       float64
EaseOfUse       float64
Satisfaction    float64
Information      object
dtype: object


#### Checking for duplicates

In [49]:
duplicates = df.duplicated()

# Showing rows with duplicates

In [50]:
duplicate_rows = df[duplicates]
print("Duplicate rows:")
print(duplicate_rows)

Duplicate rows:
                      Condition                 Drug Indication Type  Reviews  \
1     Acute Bacterial Sinusitis         Levofloxacin   On Label   RX      994   
4     Acute Bacterial Sinusitis         Azithromycin   On Label   RX      584   
5     Acute Bacterial Sinusitis         Azithromycin   On Label   RX      584   
6     Acute Bacterial Sinusitis         Azithromycin   On Label   RX      584   
7     Acute Bacterial Sinusitis         Azithromycin   On Label   RX      584   
...                         ...                  ...        ...  ...      ...   
2187                    vertigo  Diphenhydramine Hcl   On Label  OTC        1   
2188                    vertigo  Diphenhydramine Hcl   On Label  OTC        1   
2189                    vertigo  Diphenhydramine Hcl   On Label  OTC        1   
2193   vulvovaginal candidiasis          Fluconazole   On Label   RX      225   
2195   vulvovaginal candidiasis          Fluconazole   On Label   RX      168   

      Effec

# Removing duplicate rows and overwriting original DataFrame

In [51]:
df = df.drop_duplicates()

# Viewing DataFrame without duplicates

In [52]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
8,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,On Label,RX,437,3.26,3.23,2.42,Amoxicillin/clavulanic acid is a combination p...
11,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,361,2.44,2.96,1.68,Levofloxacin is used to treat a variety of bac...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Checking for '\r\n' in all columns of a DataFrame

In [53]:
mask = df.apply(lambda row: any(isinstance(cell, str) and '\r\n' in cell for cell in row), axis=1)

# Remove rows containing '\r\n'
df = df[~mask]

#### Viewing DataFrame

In [54]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX,755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX,584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
8,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,On Label,RX,437,3.26,3.23,2.42,Amoxicillin/clavulanic acid is a combination p...
11,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX,361,2.44,2.96,1.68,Levofloxacin is used to treat a variety of bac...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC,1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC,1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Finding lowest value in 'Reviews' column

In [55]:
lowest_value = df['Reviews'].min()
print(lowest_value)

1


#### Finding highest value in 'Reviews' column

In [56]:
highest_value = df['Reviews'].max()
print(highest_value)

4647


#### Finding lowest value in 'Effective' column

In [57]:
lowest_value = df['Effective'].min()
print(lowest_value)

1.0


#### Finding highest value in 'Effective' column

In [58]:
highest_value = df['Effective'].max()
print(highest_value)

5.0


#### Finding lowest value in 'EaseOfUse' column

In [59]:
lowest_value = df['EaseOfUse'].min()
print(lowest_value)

1.0


#### Finding highest value in 'EaseOfUse' column

In [60]:
highest_value = df['EaseOfUse'].max()
print(highest_value)

5.0


#### Finding lowest value in 'Satisfaction' column

In [61]:
lowest_value = df['Satisfaction'].min()
print(lowest_value)

1.0


#### Finding highest value in 'Satisfaction' column

In [62]:
highest_value = df['Satisfaction'].max()
print(highest_value)

5.0


# Getting all unique values in the 'Type' column

In [63]:
unique_type = df['Type'].unique()
print(unique_type)

['RX' 'OTC' 'RX/OTC']


#### Renaming values in 'Type' column

In [64]:
def rename_type(type_value):
    if type_value == 'RX':
        return 'RX (Prescription)'
    elif type_value == 'OTC':
        return 'OTC (Over-the-counter)'
    elif type_value == 'RX/OTC':
        return 'RX/OTC (Prescription/Over-the-counter)'
    else:
        return type_value  # If the value is not one of the specified ones, return it unchanged

# Apply the function to the 'Type' column using .loc
df.loc[:, 'Type'] = df['Type'].apply(rename_type)

#### Viewing DataFrame

In [65]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX (Prescription),994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
2,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX (Prescription),755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
3,Acute Bacterial Sinusitis,Azithromycin,On Label,RX (Prescription),584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
8,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,On Label,RX (Prescription),437,3.26,3.23,2.42,Amoxicillin/clavulanic acid is a combination p...
11,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX (Prescription),361,2.44,2.96,1.68,Levofloxacin is used to treat a variety of bac...
...,...,...,...,...,...,...,...,...,...
2214,vulvovaginal candidiasis,Clotrimazole,On Label,OTC (Over-the-counter),2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
2215,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX (Prescription),1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
2216,vulvovaginal candidiasis,Clotrimazole,On Label,OTC (Over-the-counter),1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
2217,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC (Over-the-counter),1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


#### Reseting DataFrame index

In [66]:
df.reset_index(drop=True, inplace=True)

#### Viewing DataFrame

In [67]:
df

Unnamed: 0,Condition,Drug,Indication,Type,Reviews,Effective,EaseOfUse,Satisfaction,Information
0,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX (Prescription),994,2.52,3.01,1.84,Levofloxacin is used to treat a variety of bac...
1,Acute Bacterial Sinusitis,Moxifloxacin,On Label,RX (Prescription),755,2.78,3.00,2.08,This is a generic drug. The average cash pric...
2,Acute Bacterial Sinusitis,Azithromycin,On Label,RX (Prescription),584,3.21,4.01,2.57,Azithromycin is an antibiotic (macrolide-type)...
3,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,On Label,RX (Prescription),437,3.26,3.23,2.42,Amoxicillin/clavulanic acid is a combination p...
4,Acute Bacterial Sinusitis,Levofloxacin,On Label,RX (Prescription),361,2.44,2.96,1.68,Levofloxacin is used to treat a variety of bac...
...,...,...,...,...,...,...,...,...,...
1713,vulvovaginal candidiasis,Clotrimazole,On Label,OTC (Over-the-counter),2,5.00,5.00,5.00,Clotrimazole is used to treat skin infections ...
1714,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,RX (Prescription),1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...
1715,vulvovaginal candidiasis,Clotrimazole,On Label,OTC (Over-the-counter),1,5.00,4.00,5.00,Clotrimazole is used to treat skin infections ...
1716,vulvovaginal candidiasis,Butoconazole Nitrate,On Label,OTC (Over-the-counter),1,5.00,5.00,5.00,This medication is used to treat vaginal yeast...


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

#### DataFrame exporting

#### Exporting cleaned DataFrame

In [68]:
# Specify the relative path to save the cleaned DataFrame
relative_path_save = os.path.join('..','cleaned_starting_file', 'drugs_cleaned.csv')

# Construct the absolute path to save the cleaned file
absolute_path_save = os.path.abspath(relative_path_save)

# Save the cleaned DataFrame to a CSV file
df.to_csv(absolute_path_save, index=False)