# Research Question Analysis

This is the third notebook of three (3/3). This notebook calculates metrics to evaluate the clusters from the previous notebook.

In [None]:
import pandas as pd
import numpy as np

In [None]:
# import clusters from previous notebook here
df = pd.read_pickle('Notebook2_Output.pkl')
df.head(3)

Unnamed: 0,id,prompt,small_big_tie,small_beat_big,winner_tie,winner_model_a,model_a_big,model_a,winner_model_b,model_b_big,model_b,reduced_2,kmeans_cluster_id,kmeans_cluster_label,bertopic_cluster_id,bertopic_cluster_label,grid_cluster_id,grid_cluster_label
0,65089,explain function calling. how would you call a...,0,0,1,0,0,gpt-3.5-turbo-0613,0,0,mistral-medium,"[4.365998268127441, 3.940767526626587]",146,Programming logic and algorithm generation,4,Python Programming and Coding Tasks,7.0,"Programming, Tech Support, and Algorithm Guidance"
1,96401,How can I create a test set for a very rare ca...,0,0,0,1,0,llama-2-13b-chat,0,0,mistral-7b-instruct,"[4.172776699066162, 1.9042508602142334]",166,"Machine Learning, Programming, and Technical E...",-1,,6.0,"Tech, Engineering, and Practical Problem-Solving"
2,198779,What is the best way to travel from Tel-Aviv t...,0,0,0,0,0,koala-13b,1,0,gpt-3.5-turbo-0314,"[6.135153293609619, -2.5313286781311035]",164,Travel Planning and Recommendations,23,Travel Planning and Itinerary Suggestions,8.0,"Geopolitics, History, and Trivia"


## Common Helper Functions

In [None]:
from os import readlink
# extract percentage of ties for each cluster

def calculate_tie_percentages(cluster_column, tie_col, min_prompts, df=df):
  """Calculates the percentage of ties for each cluster.

  Args:
    cluster_column: Name of the column containing the cluster ids.
    tie_col: Name of the column containing the tie data.
    min_prompts: The minimum number of prompts required for a cluster to be included in the analysis.
    df: Pandas DataFrame containing the data.

  Returns:
    A dictionary with the cluster ids as keys and the percentage of ties as values.
  """
  # Filter out clusters with fewer rows than min_prompts
  filtered_df = df.groupby(cluster_column).filter(lambda x: len(x) >= min_prompts)

  # Calculate tie percentages for the filtered clusters
  cluster_stats = filtered_df.groupby(cluster_column)[tie_col].agg(['sum', 'count'])
  tie_percentages = {}
  for cluster_id, row in cluster_stats.iterrows():
      tie_percentages[cluster_id] = {
          "tie_percentage": round((row['sum'] / row['count']) * 100, 2),
          "ties": row['sum'],
          "total_comparisons": row['count']
      }
  return tie_percentages


def get_top_ties(tie_percentages, method, n=15, df=df):
  """Gets the top n topics with the highest tie percentages and prints the results in readable format.

  Args:
    tie_percentages: Dictionary with the cluster ids as keys and the percentage of ties as values.
    method: Name of the method used to cluster the data.
    tie_col: Name of the column containing the tie data.
    n: Number of topics to return.
    df: Pandas DataFrame containing the data.

  Returns:
    A list of tuples with the cluster ids and the percentage of ties.
  """
  # sort the dictionary by tie percentage
  top_ties = sorted(tie_percentages.items(), key=lambda x: x[1]['tie_percentage'], reverse=True)[:n]
  print(f"*Top {n} topics with the highest percentages* \n")
  list_ties = []
  rank = 0
  for cluster_id, stats in top_ties:
    list_ties.append((cluster_id, stats['tie_percentage']))
    rank += 1
    print(f"{rank}. Cluster {cluster_id}: {df[df[f'{method}_cluster_id'] == cluster_id][f'{method}_cluster_label'].iloc[0]}")
    print(f"   {stats['tie_percentage']}% ties ({stats['ties']} ties/ {stats['total_comparisons']} prompts)")
    print()
  return top_ties



def get_bottom_ties(tie_percentages, method, n=15, df=df):
  """Gets the top n topics with the highest tie percentages and prints the results in readable format.

  Args:
    tie_percentages: Dictionary with the cluster ids as keys and the percentage of ties as values.
    method: Name of the method used to cluster the data.
    tie_col: Name of the column containing the tie data.
    n: Number of topics to return.
    df: Pandas DataFrame containing the data.

  Returns:
    A list of tuples with the cluster ids and the percentage of ties.
  """
  # sort the dictionary by tie percentage
  bottom_ties = sorted(tie_percentages.items(), key=lambda x: x[1]['tie_percentage'], reverse=False)[:n]
  print(f"*Top {n} topics with the lowest percentages* \n")
  list_ties = []
  rank = 0
  for cluster_id, stats in bottom_ties:
    list_ties.append((cluster_id, stats['tie_percentage']))
    rank += 1
    print(f"{rank}. Cluster {cluster_id}: {df[df[f'{method}_cluster_id'] == cluster_id][f'{method}_cluster_label'].iloc[0]}")
    print(f"   {stats['tie_percentage']}% ties ({stats['ties']} ties/ {stats['total_comparisons']} prompts)")
    print()
  return bottom_ties

def line_break():
  print()
  print('================================================')
  print()

# Research Question 1
Are there topics where humans are more likely to choose a winner/loser? In other words, what topics have the most and least ties as decided by users?

In [None]:
def rq1_analysis(method, df=df):
  """Prints the top and bottom 15 topics with the highest and lowest tie percentages.

  Args:
    method: Name of the method used to cluster the data.
    df: Pandas DataFrame containing the data.
  """
  tie_col = 'winner_tie'
  ties = calculate_tie_percentages(f'{method}_cluster_id', tie_col, 150)
  top_ties = get_top_ties(ties, method)
  line_break()
  bottom_ties = get_bottom_ties(ties, method)


## K-Means Results
Top 15 and bottom 15 clusters based on percentage of ties vs wins by any model.

In [None]:
rq1_analysis('kmeans')

*Top 15 topics with the highest percentages* 

1. Cluster 11: Family relationship puzzles
   59.21% ties (164 ties/ 277 prompts)

2. Cluster 52: Word puzzles and letter-based tasks
   49.11% ties (220 ties/ 448 prompts)

3. Cluster 137: Medical and Health-related Questions
   47.18% ties (142 ties/ 301 prompts)

4. Cluster 10: Mathematical problem-solving and computations
   46.32% ties (346 ties/ 747 prompts)

5. Cluster 150: Logical reasoning and arithmetic puzzles
   45.07% ties (201 ties/ 446 prompts)

6. Cluster 98: Physical Scenarios & Problem Solving
   44.98% ties (130 ties/ 289 prompts)

7. Cluster 67: Family and relationship riddles
   40.49% ties (83 ties/ 205 prompts)

8. Cluster 143: Dialogue Structure and Storytelling
   40.34% ties (96 ties/ 238 prompts)

9. Cluster 153: Physics, mechanics, and applied problem-solving
   40.09% ties (87 ties/ 217 prompts)

10. Cluster 42: Dates, time calculations, and basic date-related queries
   39.21% ties (129 ties/ 329 prompts)

11.

## BERTopic Results
Top and bottom clusters based on percentage of ties vs wins by any model.

In [None]:
rq1_analysis('bertopic')

*Top 15 topics with the highest percentages* 

1. Cluster 19: Mathematical Problem Solving and Calculations
   48.92% ties (113 ties/ 231 prompts)

2. Cluster 14: Simple Arithmetic and Word Problems
   45.26% ties (148 ties/ 327 prompts)

3. Cluster 27: Word Play and Linguistic Challenges
   44.25% ties (77 ties/ 174 prompts)

4. Cluster 16: Basic Math and Logic
   42.28% ties (115 ties/ 272 prompts)

5. Cluster 10: Jokes
   39.03% ties (153 ties/ 392 prompts)

6. Cluster 15: Time, Dates, and Riddles
   38.76% ties (119 ties/ 307 prompts)

7. Cluster 30: Opinion Statements and Logical Reasoning
   37.21% ties (64 ties/ 172 prompts)

8. Cluster 22: Chemistry Calculations and Experimental Procedures
   34.98% ties (78 ties/ 223 prompts)

9. Cluster 6: Programming, Code Explanation, and Debugging
   34.13% ties (172 ties/ 504 prompts)

10. Cluster 12: Sports, Athletes, and Competitions
   33.05% ties (116 ties/ 351 prompts)

11. Cluster 20: Creative Content and Image Generation Prompts
  

## 2D Grid Clustering Results
Top and bottom clusters based on percentage of ties vs wins by any model.

In [None]:
rq1_analysis('grid')

*Top 15 topics with the highest percentages* 

1. Cluster 2.0: Wordplay and Logical Challenges
   45.4% ties (301 ties/ 663 prompts)

2. Cluster 3.0: Time, Dates, and Logical Puzzles
   43.72% ties (160 ties/ 366 prompts)

3. Cluster 12.0: Health, Optometry, and General Knowledge
   43.28% ties (132 ties/ 305 prompts)

4. Cluster 11.0: Mathematics, Algorithms, and Coding Concepts
   39.12% ties (1032 ties/ 2638 prompts)

5. Cluster 13.0: Food, Health, and Puzzles
   32.0% ties (503 ties/ 1572 prompts)

6. Cluster 14.0: Science, Puzzles, and Thought Experiments
   31.11% ties (401 ties/ 1289 prompts)

7. Cluster 7.0: Programming, Tech Support, and Algorithm Guidance
   31.1% ties (1510 ties/ 4856 prompts)

8. Cluster 8.0: Geopolitics, History, and Trivia
   30.75% ties (520 ties/ 1691 prompts)

9. Cluster 0.0: Creative and Quirky Writing Prompts
   30.36% ties (585 ties/ 1927 prompts)

10. Cluster 4.0: General Knowledge and Historical Contextualization
   30.34% ties (1147 ties/ 3780 pr

## RQ1 Analysis


Overall Analysis (General Themes Generated by Chat GPT 4o)

**Overlaps in Top 15 Themes**

*   Mathematical and Logical Problem Solving
    *   K-Means:
      * Cluster 10: Mathematical problem-solving and computations
      * Cluster 150: Logical reasoning and arithmetic puzzles
      * Cluster 132: Logic Puzzles and Mathematical Reasoning
    * BERTopic:
      * Cluster 19: Mathematical Problem Solving and Calculations
      * Cluster 16: Basic Math and Logic
    * 2D Grid Clustering:
      * Cluster 11.0: Mathematics, Algorithms, and Coding Concepts

* Wordplay, Linguistics, and Logical Challenges
  * K-Means:
    * Cluster 52: Word puzzles and letter-based tasks
  * BERTopic:
    * Cluster 27: Word Play and Linguistic Challenges
  * 2D Grid Clustering:
    * Cluster 2.0: Wordplay and Logical Challenges

* Time, Dates, and Riddles
  * K-Means:
    * Cluster 42: Dates, time calculations, and basic date-related queries
  * BERTopic:
    * Cluster 15: Time, Dates, and Riddles
  * 2D Grid Clustering:
    * Cluster 3.0: Time, Dates, and Logical Puzzles

* Jokes and Creative Tasks
  * K-Means:
    * Cluster 6: Telling jokes
  * BERTopic:
    * Cluster 10: Jokes
  * 2D Grid Clustering:
    * Cluster 0.0: Creative and Quirky Writing Prompts

**Overlaps in Bottom 15 Themes**
* Travel and Recommendations
  * K-Means:
    * Cluster 164: Travel Planning and Recommendations
  * BERTopic:
    * Cluster 23: Travel Planning and Itinerary Suggestions

* Fitness, Health, and Diet
  * K-Means:
    * Cluster 176: Fitness, Diet, and Exercise Routines
  * BERTopic:
    * Cluster 32: Fitness, Exercise, and Health Plans

* Food and Recipes
  * K-Means:
    * Cluster 29: Food, recipes, and diet
  * BERTopic:
    * Cluster 2: Food, Cooking, and Recipe Creation
  * 2D Grid Clustering:
    * Cluster 13.0: Food, Health, and Puzzles

* Historical and Social Topics
  * K-Means:
    * Cluster 64: Cultural, historical, and social topics
  * BERTopic:
    * Cluster 34: History, Politics, and Warfare
  * 2D Grid Clustering:
    * Cluster 8.0: Geopolitics, History, and Trivia

* AI and Technology
  * K-Means:
    * Cluster 142: AI, Ethics, and Innovation
  * BERTopic:
    * Cluster 7: AI Applications, Ethics, and Business Strategy


Observations
Mathematical and Logical Challenges frequently appear among the top clusters with high tie percentages.

Practical and Real-World Scenarios, such as travel planning or fitness, dominate the bottom themes with low tie percentages.
Each clustering method adds a unique perspective, but strong thematic overlaps indicate similar categorization tendencies across models.

 - Analysis Questions:
    - Overall, what types of topics ranked highest/lowest in regard to tie likliness?
    - Compare tie ratios/standing of clusters within simular groups
    - Give commentary on themes that appear in both high and low tie clusters
    - Give commentary on what it means for a theme to have many ties versus not having many ties.
    - Talk about caveats that could skew/make topics appear in multiple places (vauge themes, similar themes split into multiple clusters, clusters based on prompt structure instead of theme etc, difficulty of problems)

### Cluster Theme Analysis for RQ1

**Themes Among Highest Tieing Clusters:**

As stated above the general themes found within the clusters with the largest ratios of ties were mathematical and logical problem solving, wordplay linguistics and logical Challenges, (times, dates and riddles), and (jokes and creative tasks). These topics having a high proportion of ties indicates that a large percentage of models perform similarly when given questions within this topic. This can indicate to users there's no need to search for a specialized model for handling questions related to these topics.

Reguarding the topics themselves, they all seem to be topics with one definitive answer in most cases (such as math problems, word puzzles, questions about times and dates). This indicates that a large percentage of the models in the dataset can answer these questions with similar degrees of accuracy if we're to assume a tie meant both models returned the right or both models had returned the wrong answer.

The one topic that is more open ended would be Jokes and Creative Tasks. However since the quality of jokes and creative tasks such as writing prompts can be very subjective, users may not have a criteria in mind to rate a joke, other than if it returned an answer that is related to the prompt given to a satisfactory degree. This may have led to an increase of ties. Since there is no right answer,This may indicate the models all perform similarly in the way of returning data that is relevant to the Joke/Creative prompts.

One potential caveat is that even among the clusters with the highest proportion of ties, the largest tie proportion for a single cluster is still below 60% (the cluster with the highest tie proportion overall is cluster 11 for KMeans, with 59.21% ties). This means that at least 40% of the time in several of the cases there is a clear winner for the answer to a prompt. Further a majority of the highest cluster tie proportions are under 50%, meaning that over half of the time there's still a clear winner. A potential for further exploration could identify if a certain model winner could be found for each cluster by identifying with model ties + wins the most often. Looking at proportion of wins and ties could further give insight if any model is significantly better at others for a specific topic.


**Themes Among Lowest Tieing Clusters:**

It looks like the lowest tieing topics are more open-ended and could be approached in multiple ways. For example, Travel Recommendations might be subjective and rely on user preseference. There isn't a fixed answer such as 2+2=4. Similarly, there's AI and Technology that is a bit too philosophical and might not have only one answer. Most of the least tieing themes show this property, implying that the models are less competent in answering less straightforward prompts.

Therefore, combining insights from multiple models for low tieing themes could offer more balanced responses.



# Research Question 2
We are not interested in when big/big or small/small models interact since they are so similar. For the context of LLM routing, we want to know when small models are as good as, or better than, big models.

Which clusters do small models tie against big models?

### Filtering for small model vs. large model matches only
Filtering the data to only include rows where big and small models are going head to head.

In [None]:
small_big_match = (
    (df['model_a_big'] == 0) & (df['model_b_big'] == 1)
) | (
    (df['model_a_big'] == 1) & (df['model_b_big'] == 0)
)

df2 = df[small_big_match]
df2.size

361548

### Print Formatting Functions

In [None]:
def rq2_analysis(method, df=df2):
  """Prints the top topics where small models tie with large models.

  Args:
    method: Name of the method used to cluster the data.
    df: Pandas DataFrame containing the data.
  """
  tie_col = 'small_big_tie'
  ties = calculate_tie_percentages(f'{method}_cluster_id', tie_col, 100, df=df)
  top_ties = get_top_ties(ties, method, n=15, df=df)


def rq2_analysis_small_wins_over_big(method, df=df2):
  """Prints the top topics where small models *win* against large models.

  Args:
    method: Name of the method used to cluster the data.
    df: Pandas DataFrame containing the data.
  """
  tie_col = 'small_beat_big'
  ties = calculate_tie_percentages(f'{method}_cluster_id', tie_col, 100, df=df)
  top_ties = get_top_ties(ties, method, n=15, df=df)

### K-Means Results
Top clusters based on percentage of **ties with small models and big models**.

In [None]:
rq2_analysis('kmeans')

*Top 15 topics with the highest percentages* 

1. Cluster 11: Family relationship puzzles
   57.6% ties (72 ties/ 125 prompts)

2. Cluster 52: Word puzzles and letter-based tasks
   45.81% ties (82 ties/ 179 prompts)

3. Cluster 10: Mathematical problem-solving and computations
   44.26% ties (131 ties/ 296 prompts)

4. Cluster 137: Medical and Health-related Questions
   43.85% ties (57 ties/ 130 prompts)

5. Cluster 150: Logical reasoning and arithmetic puzzles
   41.08% ties (76 ties/ 185 prompts)

6. Cluster 98: Physical Scenarios & Problem Solving
   40.5% ties (49 ties/ 121 prompts)

7. Cluster 6: Telling jokes
   37.33% ties (56 ties/ 150 prompts)

8. Cluster 151: Geography and demographic facts
   36.13% ties (43 ties/ 119 prompts)

9. Cluster 42: Dates, time calculations, and basic date-related queries
   35.92% ties (51 ties/ 142 prompts)

10. Cluster 13: Advanced mathematics and problem-solving
   35.19% ties (101 ties/ 287 prompts)

11. Cluster 66: Mathematics, logic, and t

#### K-Means Small Model Wins
Top clusters based on percentage of ***wins* by small models against big models**.

In [None]:
rq2_analysis_small_wins_over_big('kmeans')

*Top 15 topics with the highest percentages* 

1. Cluster 158: Relationships, personal dynamics, and sensitive topics
   33.6% ties (84 ties/ 250 prompts)

2. Cluster 23: Practical information & creative requests
   33.5% ties (67 ties/ 200 prompts)

3. Cluster 40: Human behavior, identity, and social issues
   31.34% ties (63 ties/ 201 prompts)

4. Cluster 9: Controversial, political, and historical inquiries
   31.25% ties (95 ties/ 304 prompts)

5. Cluster 2: Everyday decision-making and creative advice
   30.83% ties (41 ties/ 133 prompts)

6. Cluster 96: LLM Models & Machine Learning
   29.86% ties (66 ties/ 221 prompts)

7. Cluster 18: Professional communication and writing assistance
   29.79% ties (115 ties/ 386 prompts)

8. Cluster 175: Software Development and Technology
   29.79% ties (70 ties/ 235 prompts)

9. Cluster 81: Creative writing and character development
   28.81% ties (70 ties/ 243 prompts)

10. Cluster 36: Technology, data analysis, and business optimization
   

## BERTopic Results
Top clusters based on percentage of **ties with small models and big models**.

In [None]:
rq2_analysis('bertopic')

*Top 15 topics with the highest percentages* 

1. Cluster 14: Simple Arithmetic and Word Problems
   41.67% ties (55 ties/ 132 prompts)

2. Cluster 16: Basic Math and Logic
   38.21% ties (47 ties/ 123 prompts)

3. Cluster 10: Jokes
   37.42% ties (58 ties/ 155 prompts)

4. Cluster 15: Time, Dates, and Riddles
   34.88% ties (45 ties/ 129 prompts)

5. Cluster 12: Sports, Athletes, and Competitions
   34.33% ties (46 ties/ 134 prompts)

6. Cluster 11: Investment and Financial Strategies
   32.19% ties (47 ties/ 146 prompts)

7. Cluster 8: Creative Writing, Film, and Storytelling Concepts
   32.07% ties (59 ties/ 184 prompts)

8. Cluster 6: Programming, Code Explanation, and Debugging
   29.67% ties (62 ties/ 209 prompts)

9. Cluster 3: Music, Songwriting, and Lyrics Creation
   28.88% ties (67 ties/ 232 prompts)

10. Cluster -1: nan
   28.69% ties (3336 ties/ 11628 prompts)

11. Cluster 4: Python Programming and Coding Tasks
   28.51% ties (63 ties/ 221 prompts)

12. Cluster 0: Language

#### BERTopic Small Model Wins
Top clusters based on percentage of ***wins* by small models against big models**.



In [None]:
rq2_analysis_small_wins_over_big('bertopic')

*Top 15 topics with the highest percentages* 

1. Cluster 13: People, Personalities, and Identities
   29.6% ties (37 ties/ 125 prompts)

2. Cluster 7: AI Applications, Ethics, and Business Strategy
   29.19% ties (54 ties/ 185 prompts)

3. Cluster 8: Creative Writing, Film, and Storytelling Concepts
   28.8% ties (53 ties/ 184 prompts)

4. Cluster 1: Game Mechanics, Design, and Roleplaying
   28.52% ties (77 ties/ 270 prompts)

5. Cluster 5: Medical Conditions, Symptoms, and Treatments
   26.46% ties (59 ties/ 223 prompts)

6. Cluster 10: Jokes
   26.45% ties (41 ties/ 155 prompts)

7. Cluster 2: Food, Cooking, and Recipe Creation
   26.2% ties (71 ties/ 271 prompts)

8. Cluster 4: Python Programming and Coding Tasks
   25.79% ties (57 ties/ 221 prompts)

9. Cluster -1: nan
   25.53% ties (2969 ties/ 11628 prompts)

10. Cluster 3: Music, Songwriting, and Lyrics Creation
   25.0% ties (58 ties/ 232 prompts)

11. Cluster 0: Language Translation and Linguistic Questions
   23.3% ties (72

## 2D Grid Clustering Results
Top clusters based on percentage of **ties with small models and big models**.

In [None]:
rq2_analysis('grid')

*Top 15 topics with the highest percentages* 

1. Cluster 3.0: Time, Dates, and Logical Puzzles
   43.97% ties (62 ties/ 141 prompts)

2. Cluster 2.0: Wordplay and Logical Challenges
   41.13% ties (109 ties/ 265 prompts)

3. Cluster 12.0: Health, Optometry, and General Knowledge
   39.23% ties (51 ties/ 130 prompts)

4. Cluster 11.0: Mathematics, Algorithms, and Coding Concepts
   35.57% ties (387 ties/ 1088 prompts)

5. Cluster 13.0: Food, Health, and Puzzles
   30.96% ties (200 ties/ 646 prompts)

6. Cluster 4.0: General Knowledge and Historical Contextualization
   29.75% ties (449 ties/ 1509 prompts)

7. Cluster 8.0: Geopolitics, History, and Trivia
   29.36% ties (197 ties/ 671 prompts)

8. Cluster 1.0: Diverse Knowledge and Creative Tasks
   28.56% ties (357 ties/ 1250 prompts)

9. Cluster 10.0: Technical Concepts, Problem-Solving, and Practical Advice
   28.49% ties (465 ties/ 1632 prompts)

10. Cluster 14.0: Science, Puzzles, and Thought Experiments
   28.45% ties (138 ties/ 4

#### Grid Small Model Wins
Top clusters based on percentage of ***wins* by small models against big models**.

In [None]:
rq2_analysis_small_wins_over_big('grid')

*Top 15 topics with the highest percentages* 

1. Cluster 5.0: Creative Expression and Philosophical Inquiry
   28.2% ties (873 ties/ 3096 prompts)

2. Cluster 9.0: Practical Knowledge and Everyday Curiosities
   27.05% ties (475 ties/ 1756 prompts)

3. Cluster 4.0: General Knowledge and Historical Contextualization
   26.91% ties (406 ties/ 1509 prompts)

4. Cluster 6.0: Tech, Engineering, and Practical Problem-Solving
   26.81% ties (744 ties/ 2775 prompts)

5. Cluster 8.0: Geopolitics, History, and Trivia
   25.93% ties (174 ties/ 671 prompts)

6. Cluster 1.0: Diverse Knowledge and Creative Tasks
   25.04% ties (313 ties/ 1250 prompts)

7. Cluster 0.0: Creative and Quirky Writing Prompts
   24.87% ties (189 ties/ 760 prompts)

8. Cluster 12.0: Health, Optometry, and General Knowledge
   24.62% ties (32 ties/ 130 prompts)

9. Cluster 10.0: Technical Concepts, Problem-Solving, and Practical Advice
   23.41% ties (382 ties/ 1632 prompts)

10. Cluster 11.0: Mathematics, Algorithms, and 