In [1]:
from tqdm.notebook import tqdm
import smtplib
import imaplib
import email
import os
import textwrap
import pandas as pd
import sqlite3
import time

db_path = "/home/gjungwirth/data/02_data/results.db"

In [2]:
USERNAME = input("Username: ") # e...
PASSWD = input("Password: ") # G...
#SMTP_SERVER = "mail.student.tuwien.ac.at"  # imap/smtp
SMTP_SERVER = "smtp.eu.mailgun.org"
SMTP_PORT = 587
IMAP_SERVER = ""
#IMAP_SERVER = SMTP_SERVER
IMAP_PORT = 143
EMAIL_FROM = ""

In [3]:
def send_email(recpt, subj, body):
    msg = email.message.EmailMessage()
    msg['Subject'] = subj
    msg['From'] = EMAIL_FROM
    msg['To'] = recpt
    msg.set_content(body)
    text = msg.as_string()

    with smtplib.SMTP(SMTP_SERVER, port=SMTP_PORT) as s:
       s.starttls()
       s.login(USERNAME, PASSWD)
       s.send_message(msg)

    if(IMAP_SERVER):
        with imaplib.IMAP4(IMAP_SERVER, IMAP_PORT) as imap:
            imap.starttls()
            imap.login(USERNAME, PASSWD)
            imap.append('Sent', '\\Seen', imaplib.Time2Internaldate(
                time.time()), text.encode('utf8'))




In [4]:
def prepare_template(name, is_credentials, is_rsa, is_privdata, is_dependencies):
    if name:
        name = " "+name
    subject = "Disclosure and survey about GitHub usage"
    has_leaks = is_credentials or is_rsa or is_privdata or is_dependencies
    leaks_text = ""
    survey_link = ""
    if has_leaks:
        leaks_text = "\n".join((
            ("* Credentials: Your repository may contain API keys or authentication credentials, which(if valid) could be used to log in to web services in your name." if is_credentials else ""),
            ("* RSA Keys: You may have a private key or weak public RSA key, which could be used to authenticate to some service(e.g. via ssh) in your name." if is_rsa else ""),
            ("* Private Data: Your repository may contain private data, which is typically not shared publicly. This includes, browsing history, cookies, and chat logs." if is_privdata else ""),
            ("* Old/Outdated Dependencies: Your repository may contain software dependencies, which are outdated or misspelled. These could, if installed somewhwere, contain security vulnerabilities." if is_dependencies else "")))
        survey_link = ""
    else:
        leaks_text = "(No leaks have been found in your repository)"
        survey_link = ""

    return subject, textwrap.dedent('''\
        Hello{name},

        we are a research team at TU Wien, Austria. We are writing you, because you 
        are using GitHub and have a repository with configuration files (dotfiles).
        We did research on the usage and security of these repositories.

        We found the following issues with your repository (if any):

        {leaks_text}

        In order to better understand how and why you use shared configurations, we
        designed a small survey. We would be very happy, if you filled it out. It takes
        about 10-15 minutes.

        {survey_link}

        If you have any additional notes, questions or feedback, you can reply to this email.

        Thank you for your time
        Best regards
        Gerhard Jungwirth (TU Wien)
        ''').format(**{"name": name, "leaks_text": leaks_text, "survey_link": survey_link})

# subject, body = prepare_template("gerhard", False, True, True, False)
# print(body)


In [5]:
# **Take care of sent emails**

# sqlite3 -table -header results.db 'CREATE TABLE sent (email TEXT PRIMARY KEY ON CONFLICT IGNORE, sent BOOLEAN NOT NULL);'

db = sqlite3.connect(db_path)

sent_df = pd.read_sql_query('select email, sent from sent', db, dtype={"sent":"boolean"})

sent_df


Unnamed: 0,email,sent


In [6]:
#sent_df = sent_df.append({"email": "foo@bar.com", "sent": True}, ignore_index=True)
#sent_df

In [25]:
testdata = pd.read_csv(
    "/home/gjungwirth/data/02_data/dotfiles-analyse/09_send_survey/tests.csv",
    dtype = {"is_credentials": "boolean","is_rsa": "boolean","is_privdata": "boolean","is_dependencies": "boolean"},
    comment='#')
td2 = testdata.merge(sent_df, how="outer", left_on="email", right_on="email")


def send_df_row(row, pbar):
    subject, body = prepare_template(row['username'], row['is_credentials'], row['is_rsa'], row['is_privdata'], row['is_dependencies'])
    send_email(row["email"], subject, body)
    pbar.update()
    return {"email": row["email"], "sent": True}

    #return row["email"]


all_unsent = td2[~td2["sent"] | td2["sent"].isna()]
pbar = tqdm(total=all_unsent.shape[0])
result = all_unsent.apply(
    send_df_row, axis=1, result_type="expand", args=[pbar])

#sent_df.to_sql('sent', con=db)


In [30]:
#sent_df = sent_df.append(pd.DataFrame(result), ignore_index=True)
sent_df = sent_df.drop_duplicates(subset=["email"])
sent_df.to_sql("sent", con=db, if_exists="append", index=False)


In [5]:
db = sqlite3.connect(db_path)

df = pd.read_sql('select r.repo_id, r.owner_login, r.owner_email from repo r',
            db)

secret_df = pd.read_csv(
    "/home/gjungwirth/data/02_data/03_analyses/statistics_for_paper/venn_data/secret_repos.txt", header=None)
rsa_df = pd.read_csv(
    "/home/gjungwirth/data/02_data/03_analyses/statistics_for_paper/venn_data/rsapriv_repos.txt",header=None)
privdata_df = pd.read_csv(
    "/home/gjungwirth/data/02_data/03_analyses/statistics_for_paper/venn_data/privdata_repos.txt", header=None)
dependency_df = pd.read_csv(
    "/home/gjungwirth/data/02_data/03_analyses/statistics_for_paper/venn_data/dependency_repos.txt", header=None)

df["is_credentials"] = df["repo_id"].isin(secret_df[0])
df["is_rsa"] = df["repo_id"].isin(rsa_df[0])
df["is_privdata"] = df["repo_id"].isin(privdata_df[0])
df["is_dependencies"] = df["repo_id"].isin(dependency_df[0])


In [6]:
df.describe(include="all")

Unnamed: 0,repo_id,owner_login,owner_email,is_credentials,is_rsa,is_privdata,is_dependencies
count,125171.0,125171,125171.0,125171,125171,125171,125171
unique,,117779,44476.0,2,2,2,2
top,,dfmgr,,False,False,False,False
freq,,67,77512.0,114100,123682,124754,123578
mean,100236100.0,,,,,,
std,86717260.0,,,,,,
min,2316.0,,,,,,
25%,24890540.0,,,,,,
50%,75655950.0,,,,,,
75%,161111700.0,,,,,,
