In [None]:
def calculate_edit_distance(str1: str, str2: str) -> int:
    m, n = len(str1), len(str2)
    dp = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
    
    str1=str1.lower()
    str2=str2.lower()
    # Initialize first row and column
    for i in range(m + 1):
        dp[i][0] = i
    for j in range(n + 1):
        dp[0][j] = j
    
    # Fill the matrix
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if str1[i-1] == str2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = min(
                    dp[i-1][j] + 1,    # deletion
                    dp[i][j-1] + 1,    # insertion
                    dp[i-1][j-1] + 1   # substitution
                )
    
    return dp[m][n]

In [5]:
calculate_edit_distance("How you doing","how you doin'")

1

In [1]:
import json

def save_to_file(filename, data):
    with open(filename, 'w') as file:
        json.dump(data, file, indent=4)


In [4]:
def read_from_file(filename):
    try:
        with open(filename, 'r') as file:
            data = json.load(file) or {}
            return data
    except FileNotFoundError:
        # If the file doesn't exist, return an empty dictionary
        return {}


In [6]:
qa_data = read_from_file("qa_pairs.json")  # Load existing Q/A pairs from the file

In [7]:
type(qa_data)

dict

In [None]:
def add_qa_pair(qa_data, question, answer):
    qa_data[question] = answer  # Add or update the question-answer pair
    save_to_file("qa_pairs.json", qa_data)  # Save the updated dictionary to the file


In [None]:
def qa_main():
    qa_data = read_from_file("qa_pairs.json")  # Load existing Q/A pairs

    # Sample conversation loop
    while True:
        question = input("User: ")
        answer = qa_data[question] if question in qa_data else "I don't know how to respond to that. Ask something else."
        
        print("BOT: ",answer)

qa_main()

In [44]:
def get_athlete_bio(athlete_name):
    """Get biographical information for an athlete"""
    athlete_bio = bios_df[bios_df['name'] == athlete_name]
    if athlete_bio.empty:
        return "Athlete not found."
    return athlete_bio.iloc[0]

In [46]:
print(get_athlete_bio("Awais"))

Athlete not found.


In [4]:
bios_df.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [19]:
results_df['medal'] = results_df['medal'].fillna("No Medal")

In [29]:
results_df['team'] = results_df['team'].fillna("No Team")

In [31]:
results_df.to_csv('./chatbot-data/results.csv', index=False)

In [38]:
# Get means rounded to 2 decimal places
height_mean = round(bios_df['height_cm'].mean(), 1)
weight_mean = round(bios_df['weight_kg'].mean(), 1)

# Replace NaN values with rounded means
bios_df['height_cm'] = bios_df['height_cm'].fillna(height_mean)
bios_df['weight_kg'] = bios_df['weight_kg'].fillna(weight_mean)

# Verify the changes and see the rounded means
print("Average height:", height_mean)  # Should show 176.33
print("Average weight:", weight_mean)  # Should show 71.89

Average height: 176.3
Average weight: 71.9


In [40]:
bios_df = bios_df.dropna(subset=['born_date'])

In [41]:
bios_df = bios_df.dropna(subset=['died_date'])

In [43]:
bios_df.shape

(33847, 10)

In [48]:
results_df = results_df.dropna(subset=['place'])

# DROP ALL ROWS WITH NAN VALUES

```python
for column in results_df.columns:
    results_df = results_df.dropna(subset=[column])
```

--- 

> Or use the approach below in one line

In [49]:
# Drop all rows with any NaN values in one go
results_df = results_df.dropna()

# Or to see how many rows would be dropped first:
original_rows = len(results_df)
cleaned_df = results_df.dropna()
dropped_rows = original_rows - len(cleaned_df)
print(f"Dropped {dropped_rows} rows out of {original_rows}")

Dropped 0 rows out of 281518


In [51]:
results_df.to_csv('./chatbot-data/results.csv', index=False)
bios_df.to_csv('./chatbot-data/bios.csv', index=False)

### Who won medals in tennis in 1924?

> This will return all results in sentence form.

```python
medalists = interpret_question("Who won medals in tennis in 1924?")
print(medalists.columns,type(medalists))
for index, medalist in medalists.iterrows():
    print(f"{index}. {medalist['name']} from {medalist['NOC']} won a {medalist['medal']} medal in {medalist['event']} ({medalist['discipline']}).")
```        

### Which athletes competed in Tennis in 1920?
---
> 

```python
athletes_by_sport=(interpret_question("Which athletes competed in Tennis in 1920?")) 
counter=1
for index, athletes in athletes_by_sport.iterrows():
    print(f"{counter}: Athlete id={index} -> {athletes["name"]}  participated from {athletes["NOC"]}.")
    counter+=1
```    

In [1]:
import pandas as pd
bios_df= pd.read_csv("./clean-data/bios.csv")
results_df=pd.read_csv("./clean-data/results.csv")


In [2]:
results_df.columns

Index(['year', 'type', 'discipline', 'event', 'as', 'athlete_id', 'noc',
       'team', 'place', 'tied', 'medal'],
      dtype='object')


---

### Give medals won by Jean Borotra?


> Code: 
```python
    all_medals = (interpret_question("Give medals won by Jean Borotra"))
    print(f"Following medals are won by {all_medals["name"]}: ")
    for index, medal in all_medals["medals"].iterrows():
        print(f"{medal['medal'] if medal["medal"] != "No Medal" else "Silver"} medal in {medal['discipline']} in the year {int(medal['year'])} at {medal["event"]}.")
```    

### How many medals won by France?

> Functions used `get_country_performance`

> Code:
```python
medals_count=interpret_question("How many medals won by France?")
print(f"Silver medals = {medals_count.get('Silver',0)}\nGold medals = {medals_count.get('Gold',0)}\nBronze medals = {medals_count.get("Bronze",0)}")
```
> 

### Show bio data of Jean Borotra.  

> Get Bio data of athlete
--- 
> Code:
```python
    athlete=interpret_question("Show bio data of Jean Borotra.")
    print(f"Athlete ID: {athlete['athlete_id']}")
    print(f"Name: {athlete['name']}")
    print(f"Date of Birth: {athlete['born_date']} (Born in {athlete['born_city']}, {athlete['born_region']}, {athlete['born_country']})")
    print(f"Height: {athlete['height_cm']} cm")
    print(f"Weight: {athlete['weight_kg']} kg")
    print(f"Date of Death: {athlete['died_date']}")
```

In [2]:
import queries as actuators

In [4]:
actuators.get_medalists_by_year("1995")

Error: No data available for year 1995


False