In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

### Explanation: Library Imports
- `pandas`: For data manipulation and analysis.
- `matplotlib.pyplot`: For creating data visualizations.
- `seaborn`: For advanced, aesthetically pleasing statistical plots.
- `numpy`: For numerical computations and array manipulations.

In [None]:
df = pd.read_csv("restaurants_definitivo.csv", index_col = 0)
df.head()

### Explanation: Loading the Dataset
- The dataset `restaurants_definitivo.csv` is loaded into a pandas DataFrame.
- `index_col=0` sets the first column as the index of the DataFrame.
- `df.head()` displays the first five rows for a quick overview.

In [None]:
df.describe()

### Explanation: Descriptive Statistics
- `df.describe()` provides summary statistics for numerical columns:
  - **Count**: Number of non-null values.
  - **Mean**: Average value.
  - **Std**: Standard deviation (spread of the data).
  - **Min/Max**: Minimum and maximum values.
  - **25%, 50%, 75%**: Quartile values, showing data distribution.

In [None]:
df[['Number of Ratings', 'Restaurant Rating']].isna().sum() # Count how many Nan values are present

In [None]:
df[df[['Number of Ratings', 'Restaurant Rating']].isna().any(axis=1)].head() # view the NaN values

In [None]:
df_clean = df[['Number of Ratings', 'Restaurant Rating']].dropna() # Remove the NaN restaurante rows

In [None]:
df_clean.corr()

In [None]:
df['Timetable'] = df['Timetable'].fillna('Closed')  # Replace NaN
df['Timetable'] = df['Timetable'].replace(["{'Horario': 'No hay horario'}", "{'Horario': 'No Timetable'}", ""], 'Closed')

In [None]:
from datetime import datetime, timedelta

def calculate_total_hours(timetable):
    try:
        # Check that the input is a string
        if not isinstance(timetable, str):
            print(f"Invalid entry: {timetable} (not a string)")
            return 0, 0

        if timetable == "Closed":
            return None, 0

        
        timetable_dict = eval(timetable) # Convert the string to a dictionary
        total_hours = 0
        days_open = 0

        for day, hours in timetable_dict.items():
            if hours != 'Closed':
                shifts = hours.split(", ") # Some days contain multiple shifts
                daily_hours = 0
                days_open += 1

                for shift in shifts:
                    if ' - ' not in shift:
                        print(f"Invalid format for {day}: {shift}")
                        continue  # Skip invalid formats

                    opening, closing = shift.split(" - ")
                    
                    # Convert to datetime objects
                    opening_time = datetime.strptime(opening, '%H:%M')
                    closing_time = datetime.strptime(closing, '%H:%M')

                    # Handle overnight shifts
                    if closing_time < opening_time:
                        closing_time += timedelta(days=1)

                    # Calculate hours for the shift
                    shift_hours = (closing_time - opening_time).seconds / 3600  # Convert seconds to hours
                    daily_hours += shift_hours

                total_hours += daily_hours

        return total_hours, days_open
    except Exception as e:
        print(f"Error processing timetable: {timetable} - {e}")
        return 0, 0

### Explanation: Library Imports
- `pandas`: For data manipulation and analysis.
- `matplotlib.pyplot`: For creating data visualizations.
- `seaborn`: For advanced, aesthetically pleasing statistical plots.
- `numpy`: For numerical computations and array manipulations.

In [None]:
df[['Total Work Hours', "Days Open"]] = df['Timetable'].apply(calculate_total_hours).apply(pd.Series)
df[['Total Work Hours', 'Days Open']].head()

In [None]:
nan_rows = df[df["Total Work Hours"].isna()]
nan_rows

In [None]:
df[["Number of Bookmarks"]].isna().sum()

In [None]:
df[["Number of Ratings"]].isna().sum()

In [None]:
(df["Number of Ratings"] == 0).sum()

In [None]:
(df["Number of Bookmarks"] == 0).sum()

In [None]:
df_clean = df.dropna(subset=["Number of Bookmarks", "Number of Ratings"])
df_clean = df_clean.fillna({"Total Work Hours": 0})
df_clean.isna().sum()

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df_clean["Number of Bookmarks"], df_clean["Number of Ratings"], alpha=0.6, edgecolors='w', label="Data Points")

x = df_clean["Number of Bookmarks"]
y = df_clean["Number of Ratings"]

m, b = np.polyfit(x, y, 1)
print(m)
print(b)
plt.plot(x, m * x + b, color='red', label="Best Fit Line")
plt.xlabel("Number of Bookmarks", fontsize=12)
plt.ylabel("Number of Ratings", fontsize=12)
plt.title("Number of Bookmarks vs Number of Ratings", fontsize=14)
plt.legend()
plt.grid(True)
plt.savefig('ratings_bookmarks_corr.png', bbox_inches='tight')
plt.show()

In [None]:
df_clean

In [None]:
df_clean["Average Daily Hours"] = round(df_clean["Total Work Hours"] / 7, 2)
df_clean

In [None]:
df_clean[["Website", "Instagram", "Facebook"]]

In [None]:
def map_links(value):
    if value.startswith("No"):
        return 0
    else:
        return 1


In [None]:
df = df_clean

In [None]:
df["Website"] = df["Website"].apply(map_links)
df["Instagram"] = df["Instagram"].apply(map_links)
df["Facebook"] = df["Facebook"].apply(map_links)
df

In [None]:
df["Total Social Media"] = df[["Website", "Instagram", "Facebook"]].apply(lambda row: sum(value == 1 for value in row), axis=1)

In [None]:
df["Has Social Media"] = df[['Website', 'Instagram', 'Facebook']].apply(lambda row: 1 if any(value == 1 for value in row) else 0, axis=1)

In [None]:
transform_prices = {
    "Barato": 1,
    "Moderado": 2,
    "Caro": 3,
    "No price range": 0
}
df = df.replace(transform_prices)

In [None]:
df

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_norm = pd.DataFrame()
df_norm[['Normalized Ratings', 'Normalized Bookmarks', 'Normalized Rating', 'Normalized Social Media', 'Normalized Price']] = scaler.fit_transform(
    df[['Number of Ratings', 'Number of Bookmarks', 'Restaurant Rating', 'Total Social Media', 'Price Range']])
    
df['Popularity'] = (
    df_norm['Normalized Ratings'] * 0.4 +  
    df_norm['Normalized Rating'] * 0.20 +  
    df_norm['Normalized Bookmarks'] * 0.2 + 
    df_norm['Normalized Social Media'] * 0.1 + 
    df_norm['Normalized Price'] * 0.1
)
df_norm

### Explanation: Library Imports
- `pandas`: For data manipulation and analysis.
- `matplotlib.pyplot`: For creating data visualizations.
- `seaborn`: For advanced, aesthetically pleasing statistical plots.
- `numpy`: For numerical computations and array manipulations.

In [None]:
df['Popularity'].hist(bins=20, figsize=(8, 5), alpha=0.7, color='blue')
plt.title('Distribution of Popularity Metric')
plt.xlabel('Popularity')
plt.ylabel('Frequency')
plt.savefig('popularity.png', bbox_inches='tight')
plt.show()

In [None]:
numeric_cols = df.select_dtypes(include=["number"])
numeric_cols = numeric_cols.loc[: , ~numeric_cols.columns.isin(["id", "Lat", "Long"])] # Exclude id, Lat, Long columns
correlation_matrix = numeric_cols.corr()

plt.figure(figsize=(10, 8))
ax = sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Matrix for Numeric Columns')
ax.set_xticklabels(ax.get_xticklabels(), rotation=35, horizontalalignment='right')
plt.savefig('matriz_correlaciones.png', bbox_inches='tight')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['Total Work Hours'], df['Popularity'], alpha=0.6)
plt.title('Work Hours vs Popularity')
plt.xlabel('Total Work Hours')
plt.ylabel('Popularity')
plt.grid(True)
plt.show()

In [None]:
df_types = pd.read_csv("df_tipos.csv", index_col=0)
df_types

### Explanation: Loading the Dataset
- The dataset `restaurants_definitivo.csv` is loaded into a pandas DataFrame.
- `index_col=0` sets the first column as the index of the DataFrame.
- `df.head()` displays the first five rows for a quick overview.