In [39]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Defining sample data for the columns
patient_ids = range(1, 101)  # 100 unique patient IDs
opd_ids = range(1, 21)  # 20 unique OPD IDs
doctor_ids = range(1, 11)  # 50 unique doctor IDs
medicine_names = ['Paracetamol', 'Ibuprofen', 'Amoxicillin', 'Atorvastatin', 'Omeprazole']
medicine_forms = ['Tablet', 'Capsule', 'Syrup', 'Injection']
medicine_strengths = ['500 mg', '200 mg', '250 mg', '40 mg', '20 mg']
medicine_doses = ['1 Tablet', '2 Capsules', '5 ml', '1 Injection']
medicine_routes = ['Oral', 'Intravenous', 'Intramuscular']
medicine_frequencies = ['Once a day', 'Twice a day', 'Thrice a day', 'Every 6 hours']
medicine_durations = ['1 day', '3 days', '5 days', '1 week', '10 days', '2 weeks', '1 month', '3 months', '6 months']
medicine_instructions = ['Take with food', 'Take on empty stomach', 'Avoid alcohol', 'Take before bed']

# Generating random sample data
def generate_sample_data(num_samples):
    data = []
    for i in range(num_samples):
        patient_id = random.choice(patient_ids)
        date = datetime.now() - timedelta(days=random.randint(0, 365))
        # opd_id = random.choice(opd_ids)
        opd_id = i
        doctor_id = random.choice(doctor_ids)
        medicine_name = random.choice(medicine_names)
        medicine_form = random.choice(medicine_forms)
        medicine_strength = random.choice(medicine_strengths)
        medicine_dose = random.choice(medicine_doses)
        medicine_route = random.choice(medicine_routes)
        medicine_frequency = random.choice(medicine_frequencies)
        medicine_duration = random.choice(medicine_durations)
        medicine_instruction = random.choice(medicine_instructions)

        data.append([patient_id, date.strftime('%Y-%m-%d'), opd_id, doctor_id, medicine_name, medicine_form, medicine_strength, medicine_dose, medicine_route, medicine_frequency, medicine_duration, medicine_instruction])

    return data

# Generate 1000 sample rows
sample_data = generate_sample_data(1000)

# Create DataFrame
columns = ['Patient_Id', 'Date', 'OPD_Id', 'Doctor_Id', 'Medicine_Name', 'Medicine_Form', 'Medicine_Strength', 'Medicine_Dose', 'Medicine_Route', 'Medicine_Frequency', 'medicine_duration', 'Medicine_Instruction']
df = pd.DataFrame(sample_data, columns=columns)
df.head()


Unnamed: 0,Patient_Id,Date,OPD_Id,Doctor_Id,Medicine_Name,Medicine_Form,Medicine_Strength,Medicine_Dose,Medicine_Route,Medicine_Frequency,medicine_duration,Medicine_Instruction
0,24,2023-12-06,0,5,Amoxicillin,Tablet,250 mg,2 Capsules,Oral,Once a day,2 weeks,Take on empty stomach
1,40,2023-09-23,1,4,Amoxicillin,Tablet,40 mg,1 Injection,Intramuscular,Twice a day,3 months,Take before bed
2,55,2024-05-03,2,4,Omeprazole,Capsule,250 mg,1 Injection,Intramuscular,Thrice a day,1 week,Take with food
3,11,2023-11-09,3,7,Amoxicillin,Capsule,250 mg,1 Injection,Intravenous,Thrice a day,1 month,Avoid alcohol
4,89,2024-02-10,4,4,Atorvastatin,Tablet,20 mg,2 Capsules,Intravenous,Every 6 hours,1 month,Avoid alcohol


In [40]:
# prompt: Convert the Medicine_Frequency column into numerical column and add additional column to indicate unit of frequency

def convert_frequency(frequency):
  if "day" in frequency:
    if "Once" in frequency:
      return 1
    elif "Twice" in frequency:
      return 2
    elif "Thrice" in frequency:
      return 3
    else:
      return 4
  elif "hour" in frequency:
    num = [int(el) for el in frequency.split() if el.isdigit()][0]
    num = round(24/num, 2)
    return num
  else:
    return 0

df["Frequency_per_day"] = df["Medicine_Frequency"].apply(convert_frequency)
df.head()


Unnamed: 0,Patient_Id,Date,OPD_Id,Doctor_Id,Medicine_Name,Medicine_Form,Medicine_Strength,Medicine_Dose,Medicine_Route,Medicine_Frequency,medicine_duration,Medicine_Instruction,Frequency_per_day
0,24,2023-12-06,0,5,Amoxicillin,Tablet,250 mg,2 Capsules,Oral,Once a day,2 weeks,Take on empty stomach,1.0
1,40,2023-09-23,1,4,Amoxicillin,Tablet,40 mg,1 Injection,Intramuscular,Twice a day,3 months,Take before bed,2.0
2,55,2024-05-03,2,4,Omeprazole,Capsule,250 mg,1 Injection,Intramuscular,Thrice a day,1 week,Take with food,3.0
3,11,2023-11-09,3,7,Amoxicillin,Capsule,250 mg,1 Injection,Intravenous,Thrice a day,1 month,Avoid alcohol,3.0
4,89,2024-02-10,4,4,Atorvastatin,Tablet,20 mg,2 Capsules,Intravenous,Every 6 hours,1 month,Avoid alcohol,4.0


In [41]:
# prompt: convert the medicine_duration column into days column

def convert_duration(duration):
    if "day" in duration:
        num = [int(el) for el in duration.split() if el.isdigit()][0]
        return num
    elif "week" in duration:
        num = [int(el) for el in duration.split() if el.isdigit()][0]
        return num * 7
    elif "month" in duration:
        num = [int(el) for el in duration.split() if el.isdigit()][0]
        return num * 30
    else:
        return 0

df["Duration_Days"] = df["medicine_duration"].apply(convert_duration)
df['Total_Qnty'] = df['Duration_Days']*df['Frequency_per_day']
df.head()


Unnamed: 0,Patient_Id,Date,OPD_Id,Doctor_Id,Medicine_Name,Medicine_Form,Medicine_Strength,Medicine_Dose,Medicine_Route,Medicine_Frequency,medicine_duration,Medicine_Instruction,Frequency_per_day,Duration_Days,Total_Qnty
0,24,2023-12-06,0,5,Amoxicillin,Tablet,250 mg,2 Capsules,Oral,Once a day,2 weeks,Take on empty stomach,1.0,14,14.0
1,40,2023-09-23,1,4,Amoxicillin,Tablet,40 mg,1 Injection,Intramuscular,Twice a day,3 months,Take before bed,2.0,90,180.0
2,55,2024-05-03,2,4,Omeprazole,Capsule,250 mg,1 Injection,Intramuscular,Thrice a day,1 week,Take with food,3.0,7,21.0
3,11,2023-11-09,3,7,Amoxicillin,Capsule,250 mg,1 Injection,Intravenous,Thrice a day,1 month,Avoid alcohol,3.0,30,90.0
4,89,2024-02-10,4,4,Atorvastatin,Tablet,20 mg,2 Capsules,Intravenous,Every 6 hours,1 month,Avoid alcohol,4.0,30,120.0


In [42]:
df.shape

(1000, 15)

In [43]:
# prompt: Save the dataframe to D drive of the machine from google colab

from google.colab import drive
drive.mount('/content/drive')

df.to_csv('/content/drive/MyDrive/medicine_data.csv', index=False)


Mounted at /content/drive
