In [2]:
!pip install python-dotenv
!git init
!touch .gitignore
!echo "info.env" >> .gitignore
!git check-ignore -v .env


Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [3]:
import pandas as pd
import http.client
import json
import requests
import os
from dotenv import load_dotenv
import re

# Load environment variables
load_dotenv("info.env")

# Access credentials
authUsername = os.getenv("API_USERNAME")
authPassword = os.getenv("API_PASSWORD")

### Pre-filtering based on inventory ###
# Search based on specific filters
page = 8
page_size = 20
craft = 'crochet'
knit_gauge = 5
weight = 'DK'
query = 'dragon'

# Define URL for the API request
url = 'https://api.ravelry.com/patterns/search.json?page={}&page_size={}&craft={}&query={}'.format(page, page_size, craft, query)
# Make request
r = requests.get(url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
# Close connection
r.close()

def getPatterns():
  yarndf = pd.DataFrame()
  keydf = pd.DataFrame()
  if r.status_code == 200:
      data = r.json()
      # Extract pattern ID(s) from the search results
      if 'patterns' in data and len(data['patterns']) > 0:
        for i in range(len(data['patterns'])):
            pattern_id = data['patterns'][i]['id']

            # Define URL to get pattern details
            pattern_url = f'https://api.ravelry.com/patterns/{pattern_id}.json'

            # Make the request for pattern details
            pattern_response = requests.get(pattern_url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
            pattern_response.close()

            # If patterns found, collect the necessary yarn info
            if pattern_response.status_code == 200:
                pattern_data = pattern_response.json()
                try:
                  #print(json.dumps(json.loads(pattern_response.text), indent=4))
                  yarnData = pattern_data['pattern']['yarn_weight']

                  # Extract yardage for minimum/maximums
                  yardage = ['0', '0']
                  extracted = re.findall(r'\d+', pattern_data['pattern']['yardage_description'])

                  if len(extracted) == 2:
                    yardage = extracted
                  elif len(extracted) == 1:
                    yardage[0] = extracted[0]
                    yardage[1] = extracted[0]

                  # Get project name and add all to dataframe
                  yarndf = pd.concat([yarndf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                            'Yarn id': yarnData['id'],
                                                            'Crochet gauge': yarnData['crochet_gauge'],
                                                            'Knit gauge': yarnData['knit_gauge'],
                                                            'Yarn name': yarnData['name'],
                                                            'ply': yarnData['ply'], 'wpi': yarnData['wpi'],
                                                            'Min yardage': int(yardage[0]),
                                                            'Max yardage': int(yardage[1]),
                                                            'Notes' : pattern_data['pattern']['notes']}])], ignore_index=True)

                  # Get key information from pattern details for recommender
                  keydf = pd.concat([keydf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                          'Project id': pattern_data['pattern']['id'],
                                                          'Difficulty average': pattern_data['pattern']['difficulty_average'],
                                                          'UK': pattern_data['pattern']['has_uk_terminology'],
                                                          'US' : pattern_data['pattern']['has_us_terminology']}])], ignore_index=True)
                except:
                  print("No yarn weight recorded")
      else:
        print("No patterns found")
  else:
    print("Unable to access patterns")

  keydf = keydf.merge(yarndf, on='Project name')
  print(keydf)
  return keydf


def fetchInventory():
  # Open stored inventory into dataframe
  with open("inventory.json", "r") as file:
    inventory = json.load(file)
    inv = pd.DataFrame(inventory)
    groupedInv = inv.groupby(['Yarn name','ply', 'wpi', 'Total yardage']).agg(
        min_yardage=('Total yardage', 'min'),
        max_yardage=('Total yardage', 'max')).reset_index()

    print(groupedInv)
    return groupedInv

def filterPatterns(groupedInv, patterns):
  # Filters patterns for yarn available in inventory
  filtereddf = patterns[patterns['Yarn name'].isin(groupedInv['Yarn name'])]
  finaldf = pd.DataFrame(columns = patterns.columns)
  for idx, row in filtereddf.iterrows():
    rowYardage = row['Max yardage']
    maxYardage = int(max(groupedInv.loc[groupedInv['Yarn name'] == row['Yarn name'], 'max_yardage'].values))

    if (rowYardage > 0) & (rowYardage <= maxYardage):
      # Directly adds if empty for future compatibility
      if finaldf.empty:
        finaldf = pd.DataFrame([row])
      else:
        finaldf = pd.concat([finaldf, pd.DataFrame([row])], ignore_index = True)

  print(finaldf)

patterns = getPatterns()


No yarn weight recorded
No yarn weight recorded
No yarn weight recorded
No yarn weight recorded
No yarn weight recorded
                   Project name  Project id  Difficulty average     UK    US  \
0          Mini Dragon Snuggler     7312744            2.333333  False  True   
1   Hopscotch Dragon Tail Scarf     7382970            0.000000  False  True   
2          Dragon Fin Shawlette      971846            1.964286   None  True   
3    Dragon Tale Cowl (Crochet)     7319858            0.000000   None  True   
4         Mini Dragon Amigurumi      669311            4.000000   None  True   
5             Davina the Dragon     1299464            3.750000   None  True   
6       Elvira the Fairy Dragon     7363976            3.333333   None  True   
7                        dragon     1316333            4.250000   None  True   
8              Baby Luck Dragon     1142760            3.923077   None  True   
9          Chinese Paper Dragon     7311316            0.000000  False  True   


In [4]:
# Text pre-processing before recommender
chars = r'[^\w\s]' # Code for removing special characters
patterns['Notes'] = patterns['Notes'].replace(chars, '', regex=True)
patterns['Project name'] = patterns['Project name'].replace(chars, '', regex=True)

# Get current yarn inventory and filter through patterns
groupedInv = fetchInventory()
filterPatterns(groupedInv, patterns)

   Yarn name  ply  wpi  Total yardage  min_yardage  max_yardage
0         DK    8   11            500          500          500
1         DK    8   12            200          200          200
2  Fingering    4   14            800          800          800
           Project name  Project id  Difficulty average     UK    US  Yarn id  \
0  Dragon Fin Shawlette      971846            1.964286   None  True       11   
1                dragon     1316333            4.250000   None  True       11   
2  Chinese Paper Dragon     7311316            0.000000  False  True       11   

  Crochet gauge Knit gauge Yarn name ply wpi  Min yardage  Max yardage  \
0          None         22        DK   8  11          230          350   
1          None         22        DK   8  11          100          150   
2          None         22        DK   8  11           40           60   

                                               Notes  
0  A unique asymmetrical shawlette that can be st...  
1  Dragon cr

In [36]:
### Basic recommender ###
# Take the last 5 patterns saved by the user and use the average calculated metrics to then recommend
# In release, should use the saved and completed patterns
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics.pairwise import cosine_similarity


def readSaved():
  with open("saved.json", "r") as file:
    savedPatterns = json.load(file)
    saved = pd.DataFrame(savedPatterns)
    return saved

def vectorize(names, bow, vectorizer):
  tfidfTransformer = TfidfTransformer()
  tfidfMatrix = tfidfTransformer.fit_transform(bow)
  columnNames = vectorizer.get_feature_names_out() # Gets words for column naming

  return pd.DataFrame(tfidfMatrix.toarray())

def transformPatterns(saved, patterns):
  savedNames = saved['Project name'].tolist()
  patternNames = patterns['Project name'].tolist()
  features = ['Project name', 'Difficulty average', 'US', 'UK', "Yarn id"]

  savedCombined = saved.copy()
  patternsCombined = patterns.copy()

  def combineFeatures(data):
    return data['Project name'] + ' ' + data['Notes'] + ' ' + str(data['Difficulty average']) + ' ' + str(data['US']) + ' ' + str(data['UK'])


  for feature in features:
    savedCombined[feature] = savedCombined[feature].fillna('')
    patternsCombined[feature] = patternsCombined[feature].fillna('')

  savedCombined['combinedFeatures'] = savedCombined.apply(combineFeatures, axis=1)
  patternsCombined['combinedFeatures'] = patternsCombined.apply(combineFeatures, axis=1)
  allCombined = pd.concat([savedCombined["combinedFeatures"], patternsCombined["combinedFeatures"]])


  vectorizer = CountVectorizer() #Removes stop words to reduce unnecessary processing
  bagOfWordsMatrix = vectorizer.fit_transform(allCombined)

  savedMatrix = vectorize(savedNames, bagOfWordsMatrix[:len(saved)], vectorizer)
  patternMatrix = vectorize(patternNames, bagOfWordsMatrix[len(saved):], vectorizer)
  #bagOfWordsMatrix = vectorizer.fit_transform(savedCombined["combinedFeatures"])
  #one = vectorize(savedNames,bagOfWordsMatrix, vectorizer)
  #bagOfWordsMatrix = vectorizer.transform(patternsCombined["combinedFeatures"]) # Maps to the saved pattern words
  #two = vectorize(patternNames, bagOfWordsMatrix, vectorizer)

  similar = cosine_similarity(patternMatrix, savedMatrix)
  print(pd.DataFrame(similar, index = patternNames, columns = savedNames))

saved = readSaved()
transformPatterns(saved, patterns)


                             Dante the Dragon  Cute Critter Dragon  \
Mini Dragon Snuggler                 0.067184             0.142313   
Hopscotch Dragon Tail Scarf          0.065230             0.176943   
Dragon Fin Shawlette                 0.074607             0.162510   
Dragon Tale Cowl Crochet             0.144197             0.111428   
Mini Dragon Amigurumi                0.075926             0.164769   
Davina the Dragon                    0.091615             0.034256   
Elvira the Fairy Dragon              0.057640             0.124782   
dragon                               0.062721             0.129553   
Baby Luck Dragon                     0.069093             0.345982   
Chinese Paper Dragon                 0.124090             0.160413   
Skyler the Pocket Dragon             0.192045             0.147308   
Nori the Sea Dragon                  0.099671             0.087722   
Dragon Scale Scarf                   0.047746             0.079831   
Drake the Dragon    

Notes for retrieving names for the recommender
*   Remove any symbols from pattern names cause having " causes issues with the json
*   Any missing data should be 0 or none
* Probably include links to the projects when saving to the json


