## Setup logging

In [4]:
import logging
from ipylogging import DisplayHandler, HTMLFormatter


handler = DisplayHandler()
handler.setFormatter(HTMLFormatter())


log = logging.getLogger()
log.addHandler(handler)
log.setLevel(logging.INFO)

ModuleNotFoundError: No module named 'ipylogging'

## Global Variables

In [5]:
from pathlib import Path
from minirig import GHRequests, load_csv_dataset, save_csv_dataset

data_dir = Path('../data')
cache_dir_github = data_dir.joinpath('github')
project_list_path = data_dir.joinpath('project-list.csv')
email_address_dataset_path = data_dir.joinpath('email-addresses.csv')
collaborators_dir = data_dir.joinpath('project-collaborators')
github_token = open('../gh-token.txt','r').readlines()[0].strip()

## Collect email addresses

In [7]:
gh_api = GHRequests(token=github_token,cache_dir=cache_dir_github)
project_list = load_csv_dataset(project_list_path)

raw_data = []

for project in project_list:
    (owner, repo) = (project['owner'], project['project'])
    contributors = gh_api.get_repository_contributors(owner=owner, repo=repo)
    print(f'{owner} | {repo}')
    for c in contributors:
        id = 0
        try:
            user = gh_api.get_user_information(user=c['login'])
        except:
            continue
        if user['email'] != None: 
            login = c['login']
            name = user['name'] if user['name'] != None else '-'
            contributions = c['contributions']
            site_admin = c['site_admin']
            email = user['email']
            row = {'id' = id,
                   'owner': owner.capitalize(), 
                   'project': repo.capitalize(), 
                   'login': login,
                   'full-name': name, 
                   'first-name': name.split(' ')[0],
                   'last-name': name.split(' ')[-1],
                   'email': email, 
                   'n-contributions': contributions, 
                   'is_admin': site_admin}
            raw_data.append(row)
    headers = ['owner', 'project', 'login','full-name','first-name','last-name','email', 'n-contributions','is_admin']
    save_csv_dataset(filename=email_address_dataset_path, data=raw_data, header=headers)

apache | hbase
apache | hadoop
apache | spark
apache | impala
apache | geode
apache | flink
apache | hive
apache | arrow
apache | cordova
apache | lucene
apache | pylucene
apache | dubbo
apache | solr
apache | beam
apache | camel
apache | subversion
apache | netbeans
apache | cocoon
apache | myfaces
apache | tobago
apache | pulsar
apache | ignite
apache | cassandra
apache | couchdb
apache | maven
apache | ozone
apache | druid
apache | activemq
apache | drill
apache | struts
apache | airflow
apache | doris
apache | kafka
apache | openwhisk
apache | mesos
apache | sling
apache | jdo
apache | tomcat


## Remove duplicated emails

In [11]:
import pandas as pd
emails_df = pd.read_csv(email_address_dataset_path)
unique_email_df = emails_df.drop_duplicates(subset=['email'], keep='last')
unique_email_df['email-sent'] = 'no'
unique_email_df['email-date'] = None
unique_email_df['reminder-sent'] = 'no'
unique_email_df['reminder-date'] = None
unique_email_df.to_csv(data_dir.joinpath('contacts.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_email_df['email-sent'] = 'no'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_email_df['email-date'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_email_df['reminder-sent'] = 'no'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index

In [9]:
unique_email_df

Unnamed: 0,owner,project,login,full-name,first-name,last-name,email,n-contributions,is_admin,email-sent,email-date,reminder-sent,reminder-date
0,Apache,Hbase,Apache9,Duo Zhang,Duo,Zhang,palomino219@gmail.com,1273,False,no,,no,
1,Apache,Hbase,apurtell,Andrew Purtell,Andrew,Purtell,apurtell@apache.org,964,False,no,,no,
3,Apache,Hbase,enis,Enis Soztutar,Enis,Soztutar,enis@apache.org,428,False,no,,no,
4,Apache,Hbase,jmhsieh,Jonathan Hsieh,Jonathan,Hsieh,jmhsieh@gmail.com,360,False,no,,no,
5,Apache,Hbase,ndimiduk,Nick Dimiduk,Nick,Dimiduk,ndimiduk@gmail.com,281,False,no,,no,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1381,Apache,Tomcat,shirayuki,Yuki Shira,Yuki,Shira,shirayuking@gmail.com,1,False,no,,no,
1382,Apache,Tomcat,alexkachanov,Alex Kachanov,Alex,Kachanov,alexkachanov@yandex.ru,1,False,no,,no,
1383,Apache,Tomcat,iamjooon2,-,-,-,iamjooon2@gmail.com,1,False,no,,no,
1384,Apache,Tomcat,jiangtao69039,jiangtao,jiangtao,jiangtao,jiangtao69039@gmail.com,1,False,no,,no,


## Send e-mails

In [6]:
!pip install fire



In [8]:
import time
import fire
import sys
from IPython.display import clear_output

sys.path.insert(1,'/email-sender')

from email_sender.mail import EmailBuilder, EmailSender

contacts_file = "../data/teste-choma.csv" 
email_template = "email_sender/data/email-template.txt"
login_secret_file = "email_sender/data/login.secret" 
sender_name = "Joao Choma" 
subject = "Teste"

#Using a interval of 1 crashes the script. Using 3 is fine.
def send_emails(contacts_file, email_template, login_secret_file, sender_name, subject, interval=3, dry_run=False,
                email_server_smtp_address="smtp.gmail.com", email_server_smtp_port=587):
    """
    High-level function to send the emails.
    :param contacts_file: the file containing the contacts to send the emails to.
    :param email_template: the file containing the template string of the email body.
    :param login_secret_file: the file containing your credentials to login to the SMTP server.
    :param sender_name: the name of the sender to be displayed to the receiver.
    :param subject: the subject of the email as template string.
    :param interval: the time to wait between each email sent. This helps to avoid spamming! Set to 0 to not wait.
    :param dry_run: IMPORTANT! This parameter prevents from sending the emails involuntarily.
    Set to true (default) to run a dry-run of the script and ensure you set everything correctly.
    Set to false when you are ready to send the emails for real! Interval is ignored if this is set to true.
    :param email_server_smtp_address the server's SMTP address. Default is Google's one
    :param email_server_smtp_port the server's SMTP port. Default is Google's one
    """
    email_builder = EmailBuilder(contacts_file, email_template)
    emails = email_builder.create_emails(sender_name, subject)
    sender = EmailSender(host=email_server_smtp_address, smtp_port=email_server_smtp_port)
    sender.login_with_secret(login_secret_file)

    if dry_run:
        print(f"You are about to send {len(emails)} from your '{sender_name}' address with subject '{subject}'.")
        reply = input("Do you wish to proceed? [yes/no] ")
        if reply != "yes":
            print("No emails will be sent.")
            exit(0)

    for i, email in enumerate(emails):
        clear_output(wait=True)
        print(f"Sending email {i+1} from {email.sender} to {email.to} with subject '{email.subject}'")
        if not dry_run:
            sender.send_email(email)
        if not dry_run:
            print(f"Waiting {interval} second(s)...")
            time.sleep(interval)
    print("Completed sending emails.")
    sender.close()


def test_email():
    """
    An example on how to use the APIs to send the emails.
    """
    email_template = 'data/email-template.txt'
    contacts_file = 'data/contacts.csv'
    email_builder = EmailBuilder(contacts_file, email_template)
    test_email = email_builder.create_emails("d.d.sas@rug.nl", "Test Email $project")[1]

    sender = EmailSender(host="smtp.gmail.com", smtp_port=587)
    sender.login_with_secret("data/login.secret")
    sender.send_email(test_email)
    print("Email sent")
    sender.close()


send_emails(contacts_file, email_template,login_secret_file,sender_name, subject)

Sending email 1 from Joao Choma to comercial@transmoviterra.com.br with subject 'Teste'
Waiting 3 second(s)...
Completed sending emails.
