In [None]:
from reprolab.experiment import start_experiment, end_experiment
start_experiment()


2025-07-09 23:07:18 - INFO - NumExpr defaulting to 8 threads.
2025-07-09 23:07:19 - INFO - Starting experiment process
2025-07-09 23:07:19 - INFO - Step 1: Saving all notebooks
2025-07-09 23:07:19 - INFO - Attempting to save all Jupyter notebooks...
2025-07-09 23:07:19 - INFO - ipylab save command executed successfully
2025-07-09 23:07:19 - INFO - nbformat processing completed for 4 notebooks


In [None]:
# Cell 1: Loading CSV data from local file
!pip install pandas
import pandas as pd
from reprolab.experiment import persistio

@persistio()
def load_csv_data():
    return pd.read_csv('books.csv')

books_csv = load_csv_data()
print("Initial CSV data:")
print(books_csv.head())


In [None]:
# Cell 2: Setting API key

@persistio()
def get_openlibrary_api_key():
    with open("OPENLIBRARY_API_KEY.txt", "r") as file:
        return file.read().strip()


In [None]:
# Cell 3: Getting API data
!pip install requests
import requests
import pandas as pd
import os

@persistio()
def get_book_details(title):
    OPENLIBRARY_API_KEY = get_openlibrary_api_key()
    
    url = f"https://openlibrary.org/search.json?title={title}&apikey={OPENLIBRARY_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['docs']:
            book = data['docs'][0]
            return {
                'isbn': book.get('isbn', ['N/A'])[0],
                'page_count': book.get('number_of_pages_median', 0),
                'first_publish': book.get('first_publish_year', None)
            }
    return {'isbn': 'N/A', 'page_count': 0, 'first_publish': None}

api_data = books_csv['title'].apply(get_book_details)
api_df = pd.DataFrame(api_data.tolist())
api_df


In [None]:
# Cell 4: Create and track dictionary data
book_categories = {
    "The Great Gatsby": {"genre": "Fiction", "rating": 4.5, "price": 12.99},
    "1984": {"genre": "Dystopian", "rating": 4.8, "price": 15.99},
    "To Kill a Mockingbird": {"genre": "Literary Fiction", "rating": 4.7, "price": 14.99}
}

category_df = pd.DataFrame.from_dict(book_categories, orient='index')
category_df.index.name = 'title'
category_df = category_df.reset_index()
category_df


In [None]:
# Cell 5: Combine datasets and perform transformations
from datetime import datetime

combined_df = pd.concat([books_csv, api_df], axis=1)
final_df = pd.merge(combined_df, category_df, on='title', how='left')

current_year = datetime.now().year
final_df['book_age'] = current_year - final_df['publication_year']
final_df['price_per_page'] = final_df['price'] / final_df['page_count'].replace(0, 1)

def rating_category(rating):
    if rating >= 4.5:
        return 'Excellent'
    elif rating >= 4.0:
        return 'Good'
    else:
        return 'Average'

final_df['rating_category'] = final_df['rating'].apply(rating_category)


In [None]:
# Cell 6: Visualize results
genre_summary = final_df.groupby('genre').agg({
    'rating': 'mean',
    'price': 'mean',
    'page_count': 'mean'
}).round(2)
!pip install matplotlib
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
final_df.plot(kind='scatter', x='book_age', y='price', s=final_df['rating']*100, alpha=0.5)
plt.title('Book Age vs Price (size = rating)')
plt.xlabel('Book Age (years)')
plt.ylabel('Price ($)')
plt.savefig("scatter_plot.png")
plt.show()


In [None]:
from reprolab.environment import freeze_venv_dependencies
freeze_venv_dependencies('.my_venv')


In [None]:
end_experiment()


In [None]:
from reprolab.experiment import list_and_sort_git_tags
list_and_sort_git_tags()
# Pick your git tag, to download the reproducability package

In [None]:
from reprolab.experiment import download_reproducability_package
download_reproducability_package('v1.12.0')