In [1]:
#| default_exp pymatch

# PyMatch

> Suggesting people to connect with based on shared interests

**pyMatch is a database of users and their interests. Given a user from its database, it will search and return others with matching/mutual interests**

### What data will be stored?

*Note: This project is using a serverless NoSQL database called Base by deta.sh. It's free, has unlimited storage and is easy to use!*

- Dates are UTC in the format `dd-mm-yyyy hh:00`, in code this is %d-%m-%Y %H:00 (see [Python strftime cheatsheet](https://strftime.org/) for more formats).
- `user_id` is a unique identifier for each user.
- [ULIDs (Universally Unique Lexicographically Sortable Identifiers)](https://github.com/ahawker/ulid) are used as keys and order is is preserved since data in Bases are ordered by key; `key` is required by Deta and needs to be unique.
- Interests are a list of strings.
<br>


**Users**

Info stored for each user

```
[{'date': '02-10-2022 17:00',
  'key': '01BJQMF54D093DXEAWZ6JYRPAQ,
  'user id': `1111`,
  'name': 'cameronson the 69th',
  'interests': ['MMA', 'memes', 'Uk', 'tea']}]
```

In [2]:
#|export

from datetime import datetime
import time
from deta import Deta
import pandas as pd
import os
import ulid
from typing import Optional

In [3]:
!pip freeze | grep pandas
!pip freeze | grep deta
!pip freeze | grep ulid

geopandas==0.11.1
pandas==1.2.5
deta==1.1.0
ulid-py==1.1.0


A Deta project key is needed

In [4]:
#|export
#|eval: false

def deta_init(project_key: str # the name of the environment variable):
    "Initialize with a Project Key"

    deta = Deta(os.environ["PROJECT_KEY"])
    
    return deta

## Users

### Add

In [5]:
#|export

def add_someone(name: Optional[str],
                username: str, 
                user_id: str, # unique identifier to authenticate users
                interests: list[str], 
                database_name: str # create or connect to an existing database
                ):

    "Add a new user"

    db = deta.Base(database_name)

    user = db.put(
        {
            'key': ulid.new().str,
            "date": datetime.now().strftime("%d-%m-%Y %H:00"),
            "user id": user_id,
            'name': name,
            'interests': interests
            }
    )

    return user

### Find

In [6]:
#|export

def find_by_userid(user_id: str):
    "Find a user from thier user_id"

    db = deta.Base(database_name)

    user = db.fetch(
        {
        "user id": user_id
        }
    ).items

    if bool(user):
        return user
    else:
        print("user doesn't exist")

### Delete

In [7]:
#|export

def delete_user(user_id: str):
    "Deletes an entry using thier user_id if they exist"

    db = deta.Base(database_name)

    delete = db.fetch(
        {
        "user id": user_id
        }
    ).items

    # if exists, delete
    if bool(delete):
        key = delete[0]["key"]
        db.delete(key)
        print(f"user {user_id} deleted from {database_name}")
    else:
        print(f"user {user_id} not in {database_name}")

## Match

In [8]:
#|export

def match_interests(user_id: str, database_name: str):
    "Match users to a given user_id and return names and common/shared interests"

    db = deta.Base(database_name)

    # get interests for a user
    interests = users.get(user_id)['interests']

    # match
    match = users.fetch([{'interests?contains' : item} for item in interests]).items

    name = item['name']
    common_interests = set(interests) & set(item['interests'])

    matches = []
    for item in match:
        matches.append(
            {
                'name': item['name'],
                'common interests': set(interests) & set(item['interests'])
            }
        )

    return matches

## Stats

### Get whole database

In [9]:
#|export

def database_exists(database_name: str):
    "check if db exists by checking if there's at least one item"

    db = deta.Base(database_name)

    if db.fetch(limit=1).items:
        return True
    else:
        raise NameError(f"{database_name} doesn't exist")

In [10]:
#|export

def fetch_all(database_name: str):
    """
    fetches the whole database

    this is from deta's docs: https://docs.deta.sh/docs/base/sdk/#fetch-all-items-1

    uses `database_exists`
    """

    database_exists(database_name) # will create error if db doesn't exist

    db = deta.Base(database_name)
    
    res = db.fetch()
    all_items = res.items

    # fetch until last is 'None'
    while res.last:
        res = db.fetch(last=res.last)
        all_items += res.items   

    return all_items

In [11]:
#|export

def database_to_dataframe(database_name: str):
    """
    fetches the whole database and converts it to a pandas dataframe

    uses `fetch_all`
    """

    import pandas as pd

    all_items = fetch_all(database_name=database_name)

    return pd.DataFrame.from_dict(all_items)

### Count interests

In [12]:
#|export

def count_interests(database_name: str ='users'):
    """
    Shows each interest and how many times they occur. If needed, this can work for any column that contains a list of strings.

    Uses `database_to_dataframe`
    """

    count = []
    for item in database_to_dataframe(database_name)['interests'].explode().value_counts():
        count.append(item.to_dict())

    return count

In [13]:
#|export

def interestcount_to_dataframe(database_name: str ='users'):
    """
    Get interest counts as a pandas dataframe

    Uses `database_to_dataframe`
    """

    # https://re-thought.com/pandas-value_counts/

    value_counts = database_to_dataframe(database_name)['interests'].explode().value_counts()

    # converting to df and assigning new names to the columns
    df_value_counts = pd.DataFrame(value_counts)
    df_value_counts = df_value_counts.reset_index()
    df_value_counts.columns = ['interests', 'count'] # change column names
    
    return df_value_counts

### User stats

In [14]:
#|export

def total_users(database_name: str):
    "Count total users. Uses `fetch_all`"

    return len(fetch_all(database_name))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=18629d2c-e68e-4d0c-88d0-308008f53e1f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>