In [79]:
!pip install python-dotenv
!git init
!touch .gitignore
!echo "info.env" >> .gitignore
!git check-ignore -v .env
!pip install flask flask-cors pyngrok



Reinitialized existing Git repository in /content/.git/


In [83]:
# Setup API for react app communication
from flask import Flask, jsonify, request
from flask_cors import CORS
from pyngrok import ngrok

# Load environment variables
load_dotenv("info.env")

app = Flask(__name__)
CORS(app)

@app.route('/data')
def data():
    return jsonify({"message": "Working!"})

# Set up ngrok tunnel
TOKEN = str(os.getenv("NGROK_TOKEN"))
ngrok.set_auth_token(TOKEN)
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")



Public URL: NgrokTunnel: "https://c81c-35-186-191-34.ngrok-free.app" -> "http://localhost:5000"


In [None]:
import pandas as pd
import http.client
import json
import requests
import os
from dotenv import load_dotenv
import re

# Access credentials
authUsername = os.getenv("API_USERNAME")
authPassword = os.getenv("API_PASSWORD")

### Pre-filtering based on inventory ###
# Search based on specific filters
page = 8
page_size = 10
craft = 'crochet'
knit_gauge = 5
weight = 'DK'
query = 'dragon'

# Define URL for the API request
url = 'https://api.ravelry.com/patterns/search.json?page={}&page_size={}&craft={}'.format(page, page_size, craft)
# Make request
r = requests.get(url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
# Close connection
r.close()


def getPatterns():
  yarndf = pd.DataFrame()
  keydf = pd.DataFrame()
  if r.status_code == 200:
      data = r.json()
      # Extract pattern ID(s) from the search results
      if 'patterns' in data and len(data['patterns']) > 0:
        for i in range(len(data['patterns'])):
            pattern_id = data['patterns'][i]['id']

            # Define URL to get pattern details
            pattern_url = f'https://api.ravelry.com/patterns/{pattern_id}.json'

            # Make the request for pattern details
            pattern_response = requests.get(pattern_url, auth=requests.auth.HTTPBasicAuth(authUsername, authPassword))
            pattern_response.close()

            # If patterns found, collect the necessary yarn info
            if pattern_response.status_code == 200:
                pattern_data = pattern_response.json()
                try:
                  #print(json.dumps(json.loads(pattern_response.text), indent=4))
                  yarnData = pattern_data['pattern']['yarn_weight']
                  #Extract the two main categories
                  cats = pattern_data['pattern']['pattern_categories']
                  categories = []
                  for cat in cats:
                    categories.append(cat['name'])
                    categories.append(cat['parent']['name'])

                  # Extract yardage for minimum/maximums
                  yardage = ['0', '0']
                  extracted = re.findall(r'\d+', pattern_data['pattern']['yardage_description'])

                  if len(extracted) == 2:
                    yardage = extracted
                  elif len(extracted) == 1:
                    yardage[0] = extracted[0]
                    yardage[1] = extracted[0]

                  # Get project name and add all to dataframe
                  yarndf = pd.concat([yarndf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                            'Yarn id': yarnData['id'],
                                                            'Crochet gauge': yarnData['crochet_gauge'],
                                                            'Knit gauge': yarnData['knit_gauge'],
                                                            'Yarn name': yarnData['name'],
                                                            'ply': yarnData['ply'], 'wpi': yarnData['wpi'],
                                                            'Min yardage': int(yardage[0]),
                                                            'Max yardage': int(yardage[1]),
                                                            'Categories': categories}])], ignore_index=True)

                  # Get key information from pattern details for recommender
                  keydf = pd.concat([keydf, pd.DataFrame([{'Project name': pattern_data['pattern']['name'],
                                                          'Project id': pattern_data['pattern']['id'],
                                                          'Difficulty average': pattern_data['pattern']['difficulty_average'],
                                                          'UK': pattern_data['pattern']['has_uk_terminology'],
                                                          'US' : pattern_data['pattern']['has_us_terminology']}])], ignore_index=True)
                except:
                  print("")
      else:
        print("No patterns found")
  else:
    print("Unable to access patterns")

  keydf = keydf.merge(yarndf, on='Project name')
  print(keydf)
  return keydf


def fetchInventory():
  # Open stored inventory into dataframe
  with open("inventory.json", "r") as file:
    inventory = json.load(file)
    inv = pd.DataFrame(inventory)
    groupedInv = inv.groupby(['Yarn name','ply', 'wpi', 'Total yardage']).agg(
        min_yardage=('Total yardage', 'min'),
        max_yardage=('Total yardage', 'max')).reset_index()

    print(groupedInv)
    return groupedInv

def filterPatterns(groupedInv, patterns):
  # Filters patterns for yarn available in inventory
  filtereddf = patterns[patterns['Yarn name'].isin(groupedInv['Yarn name'])]
  finaldf = pd.DataFrame(columns = patterns.columns)
  for idx, row in filtereddf.iterrows():
    rowYardage = row['Max yardage']
    maxYardage = int(max(groupedInv.loc[groupedInv['Yarn name'] == row['Yarn name'], 'max_yardage'].values))

    if (rowYardage > 0) & (rowYardage <= maxYardage):
      # Directly adds if empty for future compatibility
      if finaldf.empty:
        finaldf = pd.DataFrame([row])
      else:
        finaldf = pd.concat([finaldf, pd.DataFrame([row])], ignore_index = True)

  print(finaldf)

patterns = getPatterns()

@app.route('/fetchpatterns', methods=['GET'])
def fetch_patterns():
    print("/patterns route was hit")
    patterns = getPatterns()
    return jsonify(patterns.to_dict(orient='records'))

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)



                  Project name  Project id  Difficulty average     UK    US  \
0           Mockingjay Coaster     7418434            2.666667   None  True   
1                      Lei Bag     7422172            0.000000  False  True   
2                       Ariana     1182504            4.656934   None  True   
3                  Indivisible     7304441            4.276596   None  True   
4  Mini Carrot Kawaii Cuddler®     7420746            0.000000   None  True   
5                 Gift Coaster     7386570            4.260870   None  True   
6           Honeycomb Tank Top     7421964            3.000000   None  True   
7                 Helios Ruana     7421147            0.000000   None  True   
8           Agnes Sweater Vest     1305044            3.092105   None  True   
9     Athabasca Falls Pullover     1279615            2.422222   None  True   

   Yarn id Crochet gauge Knit gauge  Yarn name ply wpi  Min yardage  \
0       11          None         22         DK   8  11     

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [61]:
# Text pre-processing before recommender
chars = r'[^\w\s]' # Code for removing special characters
patterns['Project name'] = patterns['Project name'].replace(chars, '', regex=True)

# Get current yarn inventory and filter through patterns
groupedInv = fetchInventory()
filterPatterns(groupedInv, patterns)

   Yarn name  ply  wpi  Total yardage  min_yardage  max_yardage
0         DK    8   11            500          500          500
1         DK    8   12            200          200          200
2  Fingering    4   14            800          800          800
         Project name  Project id  Difficulty average     UK    US  Yarn id  \
0  Mockingjay Coaster     7418434            2.666667   None  True       11   
1             Lei Bag     7422172            0.000000  False  True       11   
2        Gift Coaster     7386570            4.260870   None  True        5   

  Crochet gauge Knit gauge  Yarn name ply wpi  Min yardage  Max yardage  \
0          None         22         DK   8  11           30           40   
1          None         22         DK   8  11          416          437   
2                       28  Fingering   4  14           60           66   

          Categories  
0    [Coaster, Home]  
1   [Messenger, Bag]  
2  [Potholder, Home]  


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [4]:
### Basic recommender ###
# Take the last 5 patterns saved by the user and use the average calculated metrics to then recommend
# In release, should use the saved and completed patterns
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Gets patterns users have saved/started making
def readSaved():
  with open("saved.json", "r") as file:
    savedPatterns = json.load(file)
    saved = pd.DataFrame(savedPatterns)
    return saved

# Vectorizes features
def vectorize(names, bow, vectorizer):
  tfidfTransformer = TfidfTransformer()
  tfidfMatrix = tfidfTransformer.fit_transform(bow)
  return pd.DataFrame(tfidfMatrix.toarray())

# Takes features of saved patterns and finds the similairty against other found patterns
def transformPatterns(saved, patterns):
  savedNames = saved['Project name'].tolist()
  patternNames = patterns['Project name'].tolist()
  #Features to be used for comparing patterns
  features = ['Project name', 'Difficulty average', 'US', 'UK', "Categories"]

  savedCombined = saved.copy()
  patternsCombined = patterns.copy()

  # Features used for comparing patterns
  def combineFeatures(data):
    return data['Project name'] + ' ' + str(data['Difficulty average']) + ' ' + str(data['US']) + ' ' + str(data['UK']) + ' ' + str(data['Categories'][0]) + ' ' + str(data["Categories"][1])

  # Fills in any missing data
  for feature in features:
    savedCombined[feature] = savedCombined[feature].fillna('')
    patternsCombined[feature] = patternsCombined[feature].fillna('')

  savedCombined['combinedFeatures'] = savedCombined.apply(combineFeatures, axis=1)
  patternsCombined['combinedFeatures'] = patternsCombined.apply(combineFeatures, axis=1)
  # Combines all words so features from saved and patterns are used in the vectorising process
  allCombined = pd.concat([savedCombined["combinedFeatures"], patternsCombined["combinedFeatures"]])

  vectorizer = CountVectorizer()
  bagOfWordsMatrix = vectorizer.fit_transform(allCombined)

  # Splits the combined vectorizing to compare
  savedMatrix = vectorize(savedNames, bagOfWordsMatrix[:len(saved)], vectorizer)
  patternMatrix = vectorize(patternNames, bagOfWordsMatrix[len(saved):], vectorizer)

  # Finds similaritys
  similar = cosine_similarity(patternMatrix, savedMatrix)
  similarDf = pd.DataFrame(similar, index = patternNames, columns = savedNames)
  # Totals similarity across all saved patterns to find the best overall recommended ones
  totals = pd.DataFrame(similarDf.sum(axis=1))
  totals.sort_values(by=[0], ascending=False, inplace=True)
  # Displays top 10 recommended patterns
  print(totals.head(10))

saved = readSaved()
transformPatterns(saved, patterns)


KeyError: 'Categories'

Notes for retrieving names for the recommender
*   Remove any symbols from pattern names cause having " causes issues with the json
*   Any missing data should be 0 or none
* Probably include links to the projects when saving to the json


