In [2]:
!pip install python-dotenv
!git init
!touch .gitignore
!echo "info.env" >> .gitignore
!git check-ignore -v .env


Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [100]:
import pandas as pd
import http.client
import json
import requests
import os
from dotenv import load_dotenv
import re

# Load environment variables
load_dotenv("info.env")

# Access credentials
authUsername = os.getenv("API_USERNAME")
authPassword = os.getenv("API_PASSWORD")

### Pre-filtering based on inventory ###
# Search based on specific filters
page = 8
page_size = 50
craft = 'crochet'
knit_gauge = 5
weight = 'DK'
query = 'dragon'

# Define URL for the API request
url = 'https://api.ravelry.com/patterns/search.json?page={}&page_size={}&craft={}'.format(page, page_size, craft)
# Make request
r = requests.get(url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
# Close connection
r.close()

def getPatterns():
  yarndf = pd.DataFrame()
  keydf = pd.DataFrame()
  if r.status_code == 200:
      data = r.json()
      # Extract pattern ID(s) from the search results
      if 'patterns' in data and len(data['patterns']) > 0:
        for i in range(len(data['patterns'])):
            pattern_id = data['patterns'][i]['id']

            # Define URL to get pattern details
            pattern_url = f'https://api.ravelry.com/patterns/{pattern_id}.json'

            # Make the request for pattern details
            pattern_response = requests.get(pattern_url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
            pattern_response.close()

            # If patterns found, collect the necessary yarn info
            if pattern_response.status_code == 200:
                pattern_data = pattern_response.json()
                try:
                  #print(json.dumps(json.loads(pattern_response.text), indent=4))
                  yarnData = pattern_data['pattern']['yarn_weight']
                  #Extract the two main categories
                  cats = pattern_data['pattern']['pattern_categories']
                  categories = []
                  for cat in cats:
                    categories.append(cat['name'])
                    categories.append(cat['parent']['name'])

                  # Extract yardage for minimum/maximums
                  yardage = ['0', '0']
                  extracted = re.findall(r'\d+', pattern_data['pattern']['yardage_description'])

                  if len(extracted) == 2:
                    yardage = extracted
                  elif len(extracted) == 1:
                    yardage[0] = extracted[0]
                    yardage[1] = extracted[0]

                  # Get project name and add all to dataframe
                  yarndf = pd.concat([yarndf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                            'Yarn id': yarnData['id'],
                                                            'Crochet gauge': yarnData['crochet_gauge'],
                                                            'Knit gauge': yarnData['knit_gauge'],
                                                            'Yarn name': yarnData['name'],
                                                            'ply': yarnData['ply'], 'wpi': yarnData['wpi'],
                                                            'Min yardage': int(yardage[0]),
                                                            'Max yardage': int(yardage[1]),
                                                            'Categories': categories}])], ignore_index=True)

                  # Get key information from pattern details for recommender
                  keydf = pd.concat([keydf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                          'Project id': pattern_data['pattern']['id'],
                                                          'Difficulty average': pattern_data['pattern']['difficulty_average'],
                                                          'UK': pattern_data['pattern']['has_uk_terminology'],
                                                          'US' : pattern_data['pattern']['has_us_terminology']}])], ignore_index=True)
                except:
                  print("No yarn weight recorded")
      else:
        print("No patterns found")
  else:
    print("Unable to access patterns")

  keydf = keydf.merge(yarndf, on='Project name')
  print(keydf)
  return keydf


def fetchInventory():
  # Open stored inventory into dataframe
  with open("inventory.json", "r") as file:
    inventory = json.load(file)
    inv = pd.DataFrame(inventory)
    groupedInv = inv.groupby(['Yarn name','ply', 'wpi', 'Total yardage']).agg(
        min_yardage=('Total yardage', 'min'),
        max_yardage=('Total yardage', 'max')).reset_index()

    print(groupedInv)
    return groupedInv

def filterPatterns(groupedInv, patterns):
  # Filters patterns for yarn available in inventory
  filtereddf = patterns[patterns['Yarn name'].isin(groupedInv['Yarn name'])]
  finaldf = pd.DataFrame(columns = patterns.columns)
  for idx, row in filtereddf.iterrows():
    rowYardage = row['Max yardage']
    maxYardage = int(max(groupedInv.loc[groupedInv['Yarn name'] == row['Yarn name'], 'max_yardage'].values))

    if (rowYardage > 0) & (rowYardage <= maxYardage):
      # Directly adds if empty for future compatibility
      if finaldf.empty:
        finaldf = pd.DataFrame([row])
      else:
        finaldf = pd.concat([finaldf, pd.DataFrame([row])], ignore_index = True)

  print(finaldf)

patterns = getPatterns()


No yarn weight recorded
No yarn weight recorded
No yarn weight recorded
                               Project name  Project id  Difficulty average  \
0                      Little Stella Beanie     7406463            0.000000   
1                              Secret Paths      707741            3.074830   
2                 Simple Seed Stitch Beanie      885214            2.670290   
3            Nature's Walk Standard Blanket      957795            3.536508   
4                Sweet Summer Sunflower Bag      958997            2.851852   
5                         Dandy Dog Sweater     1001792            2.796460   
6               Cozy Granny Square Cardigan     1248113            2.549020   
7                                  Tiny-Rex     1262289            2.739130   
8                       Cozy Mosaic Cuddles     1264336            3.837209   
9                            Sundance Throw     1316258            2.333333   
10                             Best Bandana     1335606    

In [101]:
# Text pre-processing before recommender
chars = r'[^\w\s]' # Code for removing special characters
patterns['Project name'] = patterns['Project name'].replace(chars, '', regex=True)

# Get current yarn inventory and filter through patterns
groupedInv = fetchInventory()
filterPatterns(groupedInv, patterns)

   Yarn name  ply  wpi  Total yardage  min_yardage  max_yardage
0         DK    8   11            500          500          500
1         DK    8   12            200          200          200
2  Fingering    4   14            800          800          800
                 Project name  Project id  Difficulty average     UK    US  \
0       Ribbed Cat Ear Beanie     7307711            2.625000   None  True   
1  Ruffled Feathers Scrunchie     7380256            1.200000   None  True   
2  Summertime Sadness Coaster     7388327            1.625000   None  True   
3    HILADO HELADO Bucket Hat     7408009            0.000000  False  True   
4               Scarab Beetle     7414106            0.000000   None  True   
5       Steppingstone Pumpkin     7369191            4.833333   None  True   
6         Colourful Mini Dino     1080000            2.298507   True  None   

   Yarn id Crochet gauge Knit gauge Yarn name ply wpi  Min yardage  \
0       11          None         22        DK   8

In [108]:
### Basic recommender ###
# Take the last 5 patterns saved by the user and use the average calculated metrics to then recommend
# In release, should use the saved and completed patterns
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics.pairwise import cosine_similarity


def readSaved():
  with open("saved.json", "r") as file:
    savedPatterns = json.load(file)
    saved = pd.DataFrame(savedPatterns)
    return saved

def vectorize(names, bow, vectorizer):
  tfidfTransformer = TfidfTransformer()
  tfidfMatrix = tfidfTransformer.fit_transform(bow)
  columnNames = vectorizer.get_feature_names_out() # Gets words for column naming

  return pd.DataFrame(tfidfMatrix.toarray())

def transformPatterns(saved, patterns):
  savedNames = saved['Project name'].tolist()
  patternNames = patterns['Project name'].tolist()
  features = ['Project name', 'Difficulty average', 'US', 'UK', "Categories"]

  savedCombined = saved.copy()
  patternsCombined = patterns.copy()

  def combineFeatures(data):
    return data['Project name'] + ' ' + str(data['Difficulty average']) + ' ' + str(data['US']) + ' ' + str(data['UK']) + ' ' + str(data['Categories'][0]) + ' ' + str(data["Categories"][1])

  for feature in features:
    savedCombined[feature] = savedCombined[feature].fillna('')
    patternsCombined[feature] = patternsCombined[feature].fillna('')

  savedCombined['combinedFeatures'] = savedCombined.apply(combineFeatures, axis=1)
  patternsCombined['combinedFeatures'] = patternsCombined.apply(combineFeatures, axis=1)
  # Combines all words so features from saved and patterns are used in the vectorising process
  allCombined = pd.concat([savedCombined["combinedFeatures"], patternsCombined["combinedFeatures"]])

  vectorizer = CountVectorizer()
  bagOfWordsMatrix = vectorizer.fit_transform(allCombined)

  # Splits the combined vectorizing to compare
  savedMatrix = vectorize(savedNames, bagOfWordsMatrix[:len(saved)], vectorizer)
  patternMatrix = vectorize(patternNames, bagOfWordsMatrix[len(saved):], vectorizer)

  # Finds similaritys
  similar = cosine_similarity(patternMatrix, savedMatrix)
  similarDf = pd.DataFrame(similar, index = patternNames, columns = savedNames)
  print(similarDf)


saved = readSaved()
transformPatterns(saved, patterns)


                                       Color Block Market Bag  \
Little Stella Beanie                                 0.009103   
Secret Paths                                         0.009443   
Simple Seed Stitch Beanie                            0.007896   
Natures Walk Standard Blanket                        0.016611   
Sweet Summer Sunflower Bag                           0.435727   
Dandy Dog Sweater                                    0.009468   
Cozy Granny Square Cardigan                          0.008665   
TinyRex                                              0.012265   
Cozy Mosaic Cuddles                                  0.009608   
Sundance Throw                                       0.009365   
Best Bandana                                         0.010505   
Ribbed Cat Ear Beanie                                0.007961   
Athabasca Falls Childrens Pullover                   0.007559   
Moonstone Rose Afghan Square                         0.083685   
Ruffled Feathers Scrunchi

Notes for retrieving names for the recommender
*   Remove any symbols from pattern names cause having " causes issues with the json
*   Any missing data should be 0 or none
* Probably include links to the projects when saving to the json


