In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [26]:
import pandas as pd
file_path = '/content/drive/My Drive/IBM_GRM_Project/compliance_data.xls'
df = pd.read_excel(file_path)


In [3]:
end_col = df.columns.get_loc('Completion Date')

df2 = df.iloc[:, :end_col + 1]

df2 = df2.dropna(subset=['Completion Date'])

In [5]:
df2.columns

Index(['Learning activity - Title', 'Learning activity - ID',
       'Learning activity - Duration', 'Delivery Type', 'Learning Source Name',
       'Learner - Name', 'Learner - Email', 'Learner - ID',
       'Transcript status', 'Completion Date'],
      dtype='object')

In [6]:
THRESHOLD = 5

df2['Completion Date'] = pd.to_datetime(df2['Completion Date'], errors='coerce')

df2['Completion Day'] = df2['Completion Date'].dt.date

course_counts = (
    df2.groupby(['Learner - ID', 'Completion Day'])['Learning activity - ID']
    .nunique()
    .reset_index(name='courses_completed')
)

df_merged = df2.merge(course_counts, on=['Learner - ID', 'Completion Day'])

df_less_than_threshold = df_merged[df_merged['courses_completed'] < THRESHOLD]
df_more_than_threshold = df_merged[df_merged['courses_completed'] >= THRESHOLD]

df_less_than_threshold = df_less_than_threshold.drop(columns=['courses_completed', 'Completion Day'])
df_more_than_threshold = df_more_than_threshold.drop(columns=['courses_completed', 'Completion Day'])

## Task 2: flag students identify disposable mail

### Method 1: Manual insertions

In [13]:
df3 = df2
df3

Unnamed: 0,Learning activity - Title,Learning activity - ID,Learning activity - Duration,Delivery Type,Learning Source Name,Learner - Name,Learner - Email,Learner - ID,Transcript status,Completion Date,Completion Day
0,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-16 11:34:07+00:00,2024-08-16
1,"Climate, Biodiversity, and Human Society (brou...",MDL-285,180.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:03:52+00:00,2024-08-28
2,Beyond Conservation to Sustainability,MDL-288,60.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:45:44+00:00,2024-08-28
3,Make an Impact with Data Analytics,MDL-289,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:00:09+00:00,2024-08-28
4,Make an Impact with AI,MDL-290,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:24:03+00:00,2024-08-28
...,...,...,...,...,...,...,...,...,...,...,...
16367,Create a Credly account,URL-0E39749E2965,6.0,Guidance,Your Learning Builder - Activities,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT
16368,Earn it! Accept it! Share it! | IBM SkillsBuild,URL-CC7432BB7A8A,2.0,Video,Your Learning Builder - Activities,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT
16379,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:31:11+00:00,2024-11-30
16380,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:38:16+00:00,2024-11-30


In [14]:
import random

# Sample real and disposable domains
genuine_domains = ['gmail.com', 'yahoo.com', 'outlook.com', 'hotmail.com', 'icloud.com']
disposable_domains = ['mailinator.com', '10minutemail.com', 'tempmail.com', 'guerrillamail.com', 'trashmail.com']

# Assign random emails to each row (assuming df is your DataFrame)
def generate_dummy_email(name):
    domain = random.choice(genuine_domains + disposable_domains)
    username = name.lower().replace(" ", ".")
    return f"{username}@{domain}"

df3['DummyEmails'] = df3['Learner - Name'].apply(generate_dummy_email)


In [15]:
df3['DummyEmails']

Unnamed: 0,DummyEmails
0,adit.dhiman@gmail.com
1,adit.dhiman@icloud.com
2,adit.dhiman@tempmail.com
3,adit.dhiman@gmail.com
4,adit.dhiman@10minutemail.com
...,...
16367,mamidimada.srikanth@outlook.com
16368,mamidimada.srikanth@trashmail.com
16379,ekalavya.sunvith.chichadi@guerrillamail.com
16380,ekalavya.sunvith.chichadi@guerrillamail.com


In [16]:
# List of common genuine domains (can be expanded)
common_domains = {'gmail.com', 'yahoo.com', 'outlook.com', 'hotmail.com', 'icloud.com'}

# Function to detect disposable domains
def is_disposable(email):
    try:
        domain = email.split('@')[1].lower()
        return domain not in common_domains
    except:
        return True  # Consider malformed emails as disposable

# Apply detection
df3['IsDisposable'] = df3['DummyEmails'].apply(is_disposable)


In [17]:
disposable_emails_df = df3[df3['IsDisposable']]
genuine_emails_df = df3[~df3['IsDisposable']]


In [18]:
disposable_emails_df

Unnamed: 0,Learning activity - Title,Learning activity - ID,Learning activity - Duration,Delivery Type,Learning Source Name,Learner - Name,Learner - Email,Learner - ID,Transcript status,Completion Date,Completion Day,DummyEmails,IsDisposable
2,Beyond Conservation to Sustainability,MDL-288,60.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:45:44+00:00,2024-08-28,adit.dhiman@tempmail.com,True
4,Make an Impact with AI,MDL-290,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:24:03+00:00,2024-08-28,adit.dhiman@10minutemail.com,True
8,Large Language Model Basics,MDL-433,30.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-16 11:45:14+00:00,2024-08-16,adit.dhiman@trashmail.com,True
10,Getting Started with Artificial Intelligence,PLAN-E624C2604060,0.0,Learning Plan,Your Learning Builder - Plans,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,NaT,NaT,adit.dhiman@trashmail.com,True
16,Create a Credly account,URL-0E39749E2965,20.0,Guidance,Your Learning Builder - Activities,Harsh Kumar,haxxxx@gmail.com,3088744REG,Completed,NaT,NaT,harsh.kumar@tempmail.com,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16366,Getting Started with Artificial Intelligence,PLAN-E624C2604060,0.0,Learning Plan,Your Learning Builder - Plans,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT,mamidimada.srikanth@10minutemail.com,True
16368,Earn it! Accept it! Share it! | IBM SkillsBuild,URL-CC7432BB7A8A,2.0,Video,Your Learning Builder - Activities,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT,mamidimada.srikanth@trashmail.com,True
16379,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:31:11+00:00,2024-11-30,ekalavya.sunvith.chichadi@guerrillamail.com,True
16380,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:38:16+00:00,2024-11-30,ekalavya.sunvith.chichadi@guerrillamail.com,True


In [19]:
genuine_emails_df

Unnamed: 0,Learning activity - Title,Learning activity - ID,Learning activity - Duration,Delivery Type,Learning Source Name,Learner - Name,Learner - Email,Learner - ID,Transcript status,Completion Date,Completion Day,DummyEmails,IsDisposable
0,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-16 11:34:07+00:00,2024-08-16,adit.dhiman@gmail.com,False
1,"Climate, Biodiversity, and Human Society (brou...",MDL-285,180.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:03:52+00:00,2024-08-28,adit.dhiman@icloud.com,False
3,Make an Impact with Data Analytics,MDL-289,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:00:09+00:00,2024-08-28,adit.dhiman@gmail.com,False
5,Make an Impact with Hybrid Cloud,MDL-291,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:49:51+00:00,2024-08-28,adit.dhiman@hotmail.com,False
6,Make an Impact with Future Technology,MDL-292,60.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 15:01:14+00:00,2024-08-28,adit.dhiman@outlook.com,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16356,Create a Credly account,URL-0E39749E2965,5.0,Guidance,Your Learning Builder - Activities,Vanama mansi Chowdary,,3671281REG,Completed,NaT,NaT,vanama.mansi.chowdary@hotmail.com,False
16361,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-12-08 12:22:44+00:00,2024-12-08,mamidimada.srikanth@gmail.com,False
16362,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-12-17 05:37:31+00:00,2024-12-17,mamidimada.srikanth@icloud.com,False
16364,Build Your First Chatbot,MDL-510,60.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-11-29 18:15:36+00:00,2024-11-29,mamidimada.srikanth@gmail.com,False


### Method 2: Using Disposable Domain List

Git hub repo: https://github.com/disposable-email-domains/disposable-email-domains

Disposable List: https://github.com/disposable-email-domains/disposable-email-domains/blob/main/disposable_email_blocklist.conf

In [20]:
import requests

url = "https://raw.githubusercontent.com/disposable-email-domains/disposable-email-domains/main/disposable_email_blocklist.conf"
response = requests.get(url)

disposable_domains = {
    line.strip().lower()
    for line in response.text.splitlines()
    if line.strip() and not line.startswith('#')
}


In [21]:
def is_disposable_email(email):
    try:
        domain = email.split('@')[1].lower()
        return domain in disposable_domains
    except:
        return False  # Safe fallback


In [22]:
df3['IsDisposableNew'] = df3['DummyEmails'].apply(is_disposable_email)

In [23]:
disposable_df_new = df3[df3['IsDisposableNew'] == True]
genuine_df_new = df3[df3['IsDisposableNew'] == False]

In [24]:
disposable_df_new

Unnamed: 0,Learning activity - Title,Learning activity - ID,Learning activity - Duration,Delivery Type,Learning Source Name,Learner - Name,Learner - Email,Learner - ID,Transcript status,Completion Date,Completion Day,DummyEmails,IsDisposable,IsDisposableNew
4,Make an Impact with AI,MDL-290,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:24:03+00:00,2024-08-28,adit.dhiman@10minutemail.com,True,True
8,Large Language Model Basics,MDL-433,30.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-16 11:45:14+00:00,2024-08-16,adit.dhiman@trashmail.com,True,True
10,Getting Started with Artificial Intelligence,PLAN-E624C2604060,0.0,Learning Plan,Your Learning Builder - Plans,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,NaT,NaT,adit.dhiman@trashmail.com,True,True
19,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Ayesha Maryam,ayxxxx@gmail.com,3088998REG,Completed,2024-08-16 13:18:10+00:00,2024-08-16,ayesha.maryam@10minutemail.com,True,True
25,Earn it! Accept it! Share it! | IBM SkillsBuild,URL-CC7432BB7A8A,8.0,Video,Your Learning Builder - Activities,Ayesha Maryam,ayxxxx@gmail.com,3088998REG,Completed,NaT,NaT,ayesha.maryam@guerrillamail.com,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16366,Getting Started with Artificial Intelligence,PLAN-E624C2604060,0.0,Learning Plan,Your Learning Builder - Plans,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT,mamidimada.srikanth@10minutemail.com,True,True
16368,Earn it! Accept it! Share it! | IBM SkillsBuild,URL-CC7432BB7A8A,2.0,Video,Your Learning Builder - Activities,Mamidimada Srikanth,,3671287REG,Completed,NaT,NaT,mamidimada.srikanth@trashmail.com,True,True
16379,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:31:11+00:00,2024-11-30,ekalavya.sunvith.chichadi@guerrillamail.com,True,True
16380,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Ekalavya sunvith Chichadi,,3671308REG,Completed,2024-11-30 05:38:16+00:00,2024-11-30,ekalavya.sunvith.chichadi@guerrillamail.com,True,True


In [25]:
genuine_df_new

Unnamed: 0,Learning activity - Title,Learning activity - ID,Learning activity - Duration,Delivery Type,Learning Source Name,Learner - Name,Learner - Email,Learner - ID,Transcript status,Completion Date,Completion Day,DummyEmails,IsDisposable,IsDisposableNew
0,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-16 11:34:07+00:00,2024-08-16,adit.dhiman@gmail.com,False,False
1,"Climate, Biodiversity, and Human Society (brou...",MDL-285,180.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:03:52+00:00,2024-08-28,adit.dhiman@icloud.com,False,False
2,Beyond Conservation to Sustainability,MDL-288,60.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 13:45:44+00:00,2024-08-28,adit.dhiman@tempmail.com,True,False
3,Make an Impact with Data Analytics,MDL-289,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:00:09+00:00,2024-08-28,adit.dhiman@gmail.com,False,False
5,Make an Impact with Hybrid Cloud,MDL-291,90.0,eLearning,Moodle,Adit Dhiman,adxxx@gmail.com,3088627REG,Completed,2024-08-28 14:49:51+00:00,2024-08-28,adit.dhiman@hotmail.com,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16356,Create a Credly account,URL-0E39749E2965,5.0,Guidance,Your Learning Builder - Activities,Vanama mansi Chowdary,,3671281REG,Completed,NaT,NaT,vanama.mansi.chowdary@hotmail.com,False,False
16361,Introduction to Artificial Intelligence,MDL-211,75.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-12-08 12:22:44+00:00,2024-12-08,mamidimada.srikanth@gmail.com,False,False
16362,Mastering the Art of Prompting,MDL-298,60.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-12-17 05:37:31+00:00,2024-12-17,mamidimada.srikanth@icloud.com,False,False
16364,Build Your First Chatbot,MDL-510,60.0,eLearning,Moodle,Mamidimada Srikanth,,3671287REG,Completed,2024-11-29 18:15:36+00:00,2024-11-29,mamidimada.srikanth@gmail.com,False,False


### Method 3: Using Kickbox API

In [None]:
import requests

def check_kickbox_disposable(email):
    try:
        url = f"https://open.kickbox.com/v1/disposable/{email}"
        response = requests.get(url)
        data = response.json()
        return data.get("disposable", False)  # True = disposable email
    except Exception as e:
        print(f"Error checking email {email}: {e}")
        return None


In [None]:
df3['IsDisposableAPI'] = df3['DummyEmails'].apply(check_kickbox_disposable)

In [None]:
disposable_df_API = df3[df3['IsDisposableAPI'] == True]
genuine_df_API = df3[df3['IsDisposableAPI'] == False]


In [None]:
disposable_df_API

In [None]:
genuine_df_API