In [2]:
import pymongo
import structure
import pandas as pd
from matplotlib import pyplot as plt
import datetime
import pytz
import shelve
import copy
import os
from dateutil.parser import parse as dateParser
MONGO_URL = "mongodb://127.0.0.1:27017"
db = pymongo.MongoClient(MONGO_URL, connect=False, maxPoolSize=200).mindspore


class ContributionType:
    def __init__(self):
        self.code = False
        self.issue = False
        self.pr = False

    def __str__(self):
        return f"code contribution: {self.code}\nissue contribution: {self.issue}\npr contribution: {self.pr}"


class User:
    def __init__(self, login):
#         try:
        self.username = ''
        self.login = login
        self.contribution_type = ContributionType()
        self.PR = []
        self.commits = []
        self.updateTimestamp = None
        print(f'finding user "{self.login}" in cache...')
        try:
            if not os.path.isdir('cache'):
                os.makedirs('cache')

            with shelve.open('cache/user') as f:
                c = f[login]
                self.username = c.username
                self.login = c.login
                self.contribution_type = c.contribution_type
                self.PR = c.PR
                self.commits = c.commits
                self.updateTimestamp = c.updateTimestamp
                print(f'user found, last update time: {self.updateTimestamp}, checking update...')
        except:
            print('user not found, reconstructing...')

    
    def update_info(self):
        self.get_contribution_type()
        self.get_pr()
        self.get_commit_frequency()
        self.updateTimestamp = dateParser(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('Asia/Shanghai'))
        print('writing to cache...')
        with shelve.open('cache/user') as f:
            f[self.login] = self
        
    def get_contribution_type(self):
        # try:
        if len(list(db.commits.find({"author.login": self.login}))) != 0:
            self.contribution_type.code = True
        if len(list(db.issues.find({"user.login": self.login}))) != 0:
            self.contribution_type.issue = True
        if len(list(db.pulls.find({"user.login": self.login}))) != 0:
            self.contribution_type.pr = True
        # except

    def get_pr(self):
        if self.updateTimestamp is not None:
            prs = list(db.all_pulls.find({"user.login": self.login, "created_at": {'$gt': str(self.updateTimestamp)}}, sort=[
                                            ("created_at", pymongo.DESCENDING)]))
        else:
            prs = list(db.all_pulls.find({"user.login": self.login}, sort=[
                                            ("created_at", pymongo.DESCENDING)]))
        print(f'{len(prs)} new prs')
        for pr in prs:
            PR = structure.PR(pr)
            self.PR.append(PR)
            for commit in PR.commits:
                self.commits.append(commit)
            

            
    def get_commit_frequency(self):
        print('calculating freq...')
        if len(self.commits) == 0:
            return
        else:
            self.commits = sorted(self.commits, key=lambda commit: commit.timestamp)
            start = self.commits[0].timestamp
            time = []
            for commit in self.commits:
                time.append((commit.timestamp - start).days // 30)
            df = pd.DataFrame({"Month":time})
            print(f"Total\t    {len(df)}")
            print(pd.value_counts(df['Month'], sort=False))
        
    @staticmethod
    def get_all_contributor():
        contributors = set()
        for pr in list(db.all_pulls.find()):
            contributors.add(pr['user']['login'])
        name=['contributors']
        data = pd.DataFrame(columns=name,data=contributors)
        data.to_csv('data/contributor.csv',encoding='utf8')
        return contributors


if __name__ == '__main__':
    user = User("jachua")
    user.update_info()
    # User.get_all_contributor()


finding user "jachua" in cache...
user found, last update time: 2021-03-03 16:32:02+08:00, checking update...
0 new prs
calculating freq...
Total	    13
0    1
1    3
2    9
Name: Month, dtype: int64
writing to cache...


In [28]:
from dateutil.parser import parse as dateParser
import pytz
import time
import datetime

b = dateParser(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=pytz.utc).astimezone(pytz.timezone('Asia/Shanghai'))
a = dateParser("2021-03-02T14:27:21+08:00")
print(a)
print(b)

2021-03-02 14:27:21+08:00
2021-03-03 16:15:48+08:00
