In [41]:
import os
import pandas as pd
import re
from IPython.display import display

# ---------------------------
# Utility: Check if string looks like a link
# ---------------------------


def is_link(value: str) -> bool:
    if pd.isna(value):
        return False
    return bool(re.match(r'^https?://', str(value).strip()))


# ---------------------------
# Validate output folders (basic checks: file count, empty)
# ---------------------------
def validate_output(output_base: str):
    report_data = []

    for folder_name in os.listdir(output_base):
        folder_path = os.path.join(output_base, folder_name)
        if not os.path.isdir(folder_path):
            continue

        # Collect CSVs in this folder
        csv_files = [f for f in os.listdir(
            folder_path) if f.lower().endswith(".csv")]
        csv_count = len(csv_files)

        # Default status
        status = "GOOD" if csv_count == 1 else "BAD"
        note = ""

        if csv_count == 0:
            note = "No CSV found"
        elif csv_count > 1:
            note = f"Multiple CSVs found ({csv_count})"

        # Empty file check
        for csv_file in csv_files:
            csv_path = os.path.join(folder_path, csv_file)
            if os.path.getsize(csv_path) == 0:
                status = "BAD"
                note += f" | Empty file: {csv_file}"

        report_data.append({
            "Folder": folder_name,
            "CSV_Files": ", ".join(csv_files) if csv_files else "None",
            "CSV_Count": csv_count,
            "Status": status,
            "Notes": note.strip(" |")
        })

    df_report = pd.DataFrame(report_data)
    print("📑 Folder Validation Report")
    display(df_report)
    return df_report


# ---------------------------
# Validate CSV contents (links + empty check in first column)
# ---------------------------
def validate_csv_contents(output_base: str):
    content_issues = []
    headers_collection = []

    for folder_name in os.listdir(output_base):
        folder_path = os.path.join(output_base, folder_name)
        if not os.path.isdir(folder_path):
            continue

        csv_files = [f for f in os.listdir(
            folder_path) if f.lower().endswith(".csv")]
        for csv_file in csv_files:
            csv_path = os.path.join(folder_path, csv_file)

            try:
                try:
                    df = pd.read_csv(csv_path, encoding="utf-8")
                except UnicodeDecodeError:
                    df = pd.read_csv(csv_path, encoding="latin1")

                # Collect headers
                headers_collection.append({
                    "Folder": folder_name,
                    "CSV_File": csv_file,
                    "Headers": list(df.columns)
                })

                if df.shape[1] > 0:
                    first_col = df.iloc[:, 0]

                    # Check for empty values (excluding header)
                    empty_count = first_col.isna().sum()
                    if empty_count > 0:
                        content_issues.append({
                            "Folder": folder_name,
                            "CSV_File": csv_file,
                            "Issue": f"{empty_count} empty values in first column"
                        })

                    # Check for invalid (non-link) values
                    invalid = [v for v in first_col.dropna() if not is_link(v)]
                    if invalid:
                        content_issues.append({
                            "Folder": folder_name,
                            "CSV_File": csv_file,
                            "Issue": f"{len(invalid)} non-link values in first column"
                        })

            except Exception as e:
                content_issues.append({
                    "Folder": folder_name,
                    "CSV_File": csv_file,
                    "Issue": f"Failed to read CSV - {str(e)}"
                })

    # Build DataFrames
    df_issues = pd.DataFrame(content_issues)
    df_headers = pd.DataFrame(headers_collection)

    print("⚠️ Content Validation Issues")
    display(df_issues if not df_issues.empty else pd.DataFrame(
        [{"Status": "No issues found ✅"}]))

    print("\n📊 CSV Headers Overview")
    display(df_headers)

    return df_issues, df_headers


# ---------------------------
# Run full validation
# ---------------------------
OUTPUT_DIR = "Downloaded_Universities"

# Step 1: Validate folder/file structure
df_report = validate_output(OUTPUT_DIR)

# Step 2: Validate CSV contents
df_issues, df_headers = validate_csv_contents(OUTPUT_DIR)

📑 Folder Validation Report


Unnamed: 0,Folder,CSV_Files,CSV_Count,Status,Notes
0,1-aZqxTN1RpLwmfwxeGCMvmDiZo5mpKsS,Okanagan College Courses.csv,1,GOOD,
1,1-pYDlcnnI_ZsHLcBnRMNjrCZZIW3HAk4,University of Rhode Island Courses.csv,1,GOOD,
2,1-xfPxe6bhebWSWOJX22RVyPqNqveQZDq,Craleton University Courses.csv,1,GOOD,
3,1-Y5aXdseCkhY_AiqhIFmQcSl4xGbWOGV,University of Bologna Courses.csv,1,GOOD,
4,10IZEzyvflhrSY3BU-6cGpvz7QWdMVR9O,Free University of Berlin Courses.csv,1,GOOD,
...,...,...,...,...,...
347,University of Galway,University of Galway.csv,1,GOOD,
348,University of Lethbridge,University of Lethbridge.csv,1,GOOD,
349,University of Limerick,University of Limerick.csv,1,GOOD,
350,University of New Burnswick,University of New Burnswick.csv,1,GOOD,


⚠️ Content Validation Issues


Unnamed: 0,Status
0,No issues found ✅



📊 CSV Headers Overview


Unnamed: 0,Folder,CSV_File,Headers
0,1-aZqxTN1RpLwmfwxeGCMvmDiZo5mpKsS,Okanagan College Courses.csv,"[Course Link, Course Title, Required IELTS Sco..."
1,1-pYDlcnnI_ZsHLcBnRMNjrCZZIW3HAk4,University of Rhode Island Courses.csv,"[Course Link, Course Title, Required IELTS Sco..."
2,1-xfPxe6bhebWSWOJX22RVyPqNqveQZDq,Craleton University Courses.csv,"[Course Link, Course Title, Required IELTS Sco..."
3,1-Y5aXdseCkhY_AiqhIFmQcSl4xGbWOGV,University of Bologna Courses.csv,"[Course Link, Course Title, Required IELTS Sco..."
4,10IZEzyvflhrSY3BU-6cGpvz7QWdMVR9O,Free University of Berlin Courses.csv,"[Course Link, Course Title, Required IELTS Sco..."
...,...,...,...
347,University of Galway,University of Galway.csv,"[Course Link, Course Title, Required IELTS Sco..."
348,University of Lethbridge,University of Lethbridge.csv,"[Course Link, Course Title, Required IELTS Sco..."
349,University of Limerick,University of Limerick.csv,"[Course Link, Course Title, Required IELTS Sco..."
350,University of New Burnswick,University of New Burnswick.csv,"[Course Link, Course Title, Required IELTS Sco..."


In [1]:
import os
import re
import gspread
import pandas as pd
from gspread_dataframe import get_as_dataframe

# ---------------------------
# Configurations
# ---------------------------
SERVICE_ACCOUNT_FILE = 'service_account.json'
SHEET_ID = "1eYz8Nvr3BToRrmReXNLR8zQrk4X8tsdKZO_Fj9mNThc"
SHEET_NAME = "Updating"
CSV_COLUMN = "CSV"
UNIVERSITY_COLUMN = "University Name"

OUTPUT_DIR = "Downloaded_Universities"

# ---------------------------
# Authenticate Google Sheet
# ---------------------------
gc = gspread.service_account(filename=SERVICE_ACCOUNT_FILE)
sh = gc.open_by_key(SHEET_ID)
ws = sh.worksheet(SHEET_NAME)
df = get_as_dataframe(ws, evaluate_formulas=True, header=0).dropna(
    subset=[CSV_COLUMN, UNIVERSITY_COLUMN])

# ---------------------------
# Utility: Safe filename
# ---------------------------


def safe_filename(name: str) -> str:
    return re.sub(r'[.\'<>:"/\\|?*]', '', name).strip()


# ---------------------------
# Process each downloaded folder
# ---------------------------
for folder_name in os.listdir(OUTPUT_DIR):
    folder_path = os.path.join(OUTPUT_DIR, folder_name)
    if not os.path.isdir(folder_path):
        continue

    # Find rows where CSV column contains the folder_name (partial match)
    mask = df[CSV_COLUMN].astype(str).str.contains(
        folder_name, na=False, case=False)
    matches = df[mask]

    if matches.empty:
        print(f"⚠️ No match found in sheet for folder: {folder_name}")
        continue

    # Take the first match (or could loop if multiple)
    uni_name = str(matches.iloc[0][UNIVERSITY_COLUMN]).strip()
    if not uni_name:
        print(f"⚠️ University Name empty for folder: {folder_name}")
        continue

    new_file_name = safe_filename(uni_name) + ".csv"

    # Find CSV file inside folder (assume only 1 relevant CSV)
    csv_files = [f for f in os.listdir(
        folder_path) if f.lower().endswith(".csv")]
    if not csv_files:
        print(f"⚠️ No CSV file found in folder: {folder_name}")
        continue

    old_csv_path = os.path.join(folder_path, csv_files[0])
    new_csv_path = os.path.join(folder_path, new_file_name)

    # Rename
    os.rename(old_csv_path, new_csv_path)
    print(
        f"✅ Renamed {csv_files[0]} → {new_file_name} in folder {folder_name}")

print("\n🎉 All renaming completed!")

✅ Renamed Okanagan College Courses.csv → Okanagan College.csv in folder 1-aZqxTN1RpLwmfwxeGCMvmDiZo5mpKsS
✅ Renamed University of Rhode Island Courses.csv → University of Rhode Island.csv in folder 1-pYDlcnnI_ZsHLcBnRMNjrCZZIW3HAk4
✅ Renamed Craleton University Courses.csv → Craleton University.csv in folder 1-xfPxe6bhebWSWOJX22RVyPqNqveQZDq
✅ Renamed University of Bologna Courses.csv → University of Bologna.csv in folder 1-Y5aXdseCkhY_AiqhIFmQcSl4xGbWOGV
✅ Renamed Free University of Berlin Courses.csv → Free University of Berlin.csv in folder 10IZEzyvflhrSY3BU-6cGpvz7QWdMVR9O
✅ Renamed University of Sydney Courses.csv → University of Sydney.csv in folder 10vEuC8Wbt4Y6aF1phbFAwnuIjLSgXmof
✅ Renamed Trent University Courses.csv → Trent University.csv in folder 10Xcwi6Qa5dYE4ACsEzAkbFEUvQZBPJ8d
✅ Renamed Liverpool John Moores University Courses.csv → Liverpool John Moores University.csv in folder 11-78843GuAEaGclvTN47VfOMk_4iseRX
✅ Renamed CQ University Courses.csv → CQ University.csv in

In [2]:
import os
import shutil

OUTPUT_DIR = "Downloaded_Universities"

# ---------------------------
# Flatten all CSVs into OUTPUT_DIR
# ---------------------------
for folder_name in os.listdir(OUTPUT_DIR):
    folder_path = os.path.join(OUTPUT_DIR, folder_name)

    if not os.path.isdir(folder_path):
        continue

    for file_name in os.listdir(folder_path):
        if not file_name.lower().endswith(".csv"):
            continue

        src_path = os.path.join(folder_path, file_name)
        dst_path = os.path.join(OUTPUT_DIR, file_name)

        # Handle duplicates
        if os.path.exists(dst_path):
            base, ext = os.path.splitext(file_name)
            counter = 1
            while True:
                new_name = f"{base}_{counter}{ext}"
                dst_path = os.path.join(OUTPUT_DIR, new_name)
                if not os.path.exists(dst_path):
                    break
                counter += 1

        # Move file
        shutil.move(src_path, dst_path)
        print(f"✅ Moved {file_name} → {os.path.basename(dst_path)}")

    # Remove folder if empty
    try:
        os.rmdir(folder_path)
        print(f"🗑️ Removed empty folder: {folder_name}")
    except OSError:
        print(f"⚠️ Could not remove {folder_name}, not empty.")

print("\n🎉 All CSVs have been moved to root output folder!")

✅ Moved Okanagan College.csv → Okanagan College.csv
🗑️ Removed empty folder: 1-aZqxTN1RpLwmfwxeGCMvmDiZo5mpKsS
✅ Moved University of Rhode Island.csv → University of Rhode Island.csv
🗑️ Removed empty folder: 1-pYDlcnnI_ZsHLcBnRMNjrCZZIW3HAk4
✅ Moved Craleton University.csv → Craleton University.csv
🗑️ Removed empty folder: 1-xfPxe6bhebWSWOJX22RVyPqNqveQZDq
✅ Moved University of Bologna.csv → University of Bologna.csv
🗑️ Removed empty folder: 1-Y5aXdseCkhY_AiqhIFmQcSl4xGbWOGV
✅ Moved Free University of Berlin.csv → Free University of Berlin.csv
🗑️ Removed empty folder: 10IZEzyvflhrSY3BU-6cGpvz7QWdMVR9O
✅ Moved University of Sydney.csv → University of Sydney.csv
🗑️ Removed empty folder: 10vEuC8Wbt4Y6aF1phbFAwnuIjLSgXmof
✅ Moved Trent University.csv → Trent University.csv
🗑️ Removed empty folder: 10Xcwi6Qa5dYE4ACsEzAkbFEUvQZBPJ8d
✅ Moved Liverpool John Moores University.csv → Liverpool John Moores University.csv
🗑️ Removed empty folder: 11-78843GuAEaGclvTN47VfOMk_4iseRX
✅ Moved CQ Univers

In [10]:
import os
import re
import gspread
import pandas as pd
from gspread_dataframe import get_as_dataframe

# ---------------------------
# Configurations
# ---------------------------
SERVICE_ACCOUNT_FILE = "service_account.json"
SHEET_ID = "1eYz8Nvr3BToRrmReXNLR8zQrk4X8tsdKZO_Fj9mNThc"
SHEET_NAME = "Updating"
OUTPUT_DIR = "Downloaded_Universities"

# ---------------------------
# Safe filename cleaner
# ---------------------------


def safe_filename(name: str) -> str:
    if not isinstance(name, str):
        return ""
    # remove forbidden filesystem characters
    cleaned = re.sub(r'[.\'<>:"/\\|?*]', '', name)
    # also normalize: lowercase + remove spaces
    return cleaned.lower().replace(" ", "").strip()


# ---------------------------
# Load Google Sheet
# ---------------------------
gc = gspread.service_account(filename=SERVICE_ACCOUNT_FILE)
sh = gc.open_by_key(SHEET_ID)
ws = sh.worksheet(SHEET_NAME)
df = get_as_dataframe(ws, evaluate_formulas=True, header=0)

# University names from sheet
sheet_unis = df["University Name"].dropna().tolist()
sheet_unis_clean = {safe_filename(u): u for u in sheet_unis}

# ---------------------------
# Load downloaded CSVs
# ---------------------------
csv_files = [f for f in os.listdir(OUTPUT_DIR) if f.lower().endswith(".csv")]
csv_names_clean = {safe_filename(os.path.splitext(f)[0]): f for f in csv_files}

# ---------------------------
# Find missing universities
# ---------------------------
missing = []
for uni_clean, uni_original in sheet_unis_clean.items():
    if uni_clean not in csv_names_clean:
        missing.append(uni_original)

# ---------------------------
# Output result
# ---------------------------
if missing:
    print("⚠️ Missing CSVs for the following universities:")
    for uni in missing:
        print(" -", uni)
else:
    print("✅ All universities have CSVs downloaded!")

✅ All universities have CSVs downloaded!


In [11]:
# ---------------------------
# Find duplicate universities in sheet
# ---------------------------
from collections import defaultdict

uni_counts = defaultdict(list)

# start=2 (row numbers, assuming header at row 1)
for idx, uni in enumerate(sheet_unis, start=2):
    clean = safe_filename(uni)
    uni_counts[clean].append((idx, uni))

duplicates = {k: v for k, v in uni_counts.items() if len(v) > 1}

if duplicates:
    print("⚠️ Duplicate universities found in sheet:")
    for clean, entries in duplicates.items():
        print(f" - {entries[0][1]} (appears {len(entries)} times)")
        for row_num, raw_name in entries:
            print(f"    Row {row_num}: {raw_name}")
else:
    print("✅ No duplicates found in sheet.")

✅ No duplicates found in sheet.


In [12]:
# ---------------------------
# Find extra CSVs (not in sheet)
# ---------------------------

extra_csvs = []
for csv_clean, csv_file in csv_names_clean.items():
    if csv_clean not in sheet_unis_clean:
        extra_csvs.append(csv_file)

# ---------------------------
# Output result
# ---------------------------
if extra_csvs:
    print("⚠️ Extra CSV files found (not in sheet):")
    for f in extra_csvs:
        print(" -", f)
else:
    print("✅ No extra CSV files. All match the sheet.")

⚠️ Extra CSV files found (not in sheet):
 - British Columbia Institute of Technology_1.csv
 - Trent University_1.csv
 - Vancouver Island University_1.csv


In [13]:
import os
import re
import gspread
from gspread_dataframe import get_as_dataframe
from collections import defaultdict

# ---------------------------
# Config
# ---------------------------
SERVICE_ACCOUNT_FILE = "service_account.json"
SHEET_ID = "1eYz8Nvr3BToRrmReXNLR8zQrk4X8tsdKZO_Fj9mNThc"
SHEET_NAME = "Updating"
OUTPUT_DIR = "Downloaded_Universities"
UNIVERSITY_COL = "University Name"

# ---------------------------
# Use the same cleaner for matching & filenames
# ---------------------------


def safe_filename(name: str) -> str:
    if not isinstance(name, str):
        return ""
    cleaned = re.sub(r'[.\'<>:"/\\|?*]', '', name)  # same regex you used
    return cleaned.lower().replace(" ", "").strip()


# ---------------------------
# Load sheet
# ---------------------------
gc = gspread.service_account(filename=SERVICE_ACCOUNT_FILE)
ws = gc.open_by_key(SHEET_ID).worksheet(SHEET_NAME)
df = get_as_dataframe(ws, evaluate_formulas=True, header=0)

if UNIVERSITY_COL not in df.columns:
    raise SystemExit(f"❌ Column '{UNIVERSITY_COL}' not found in sheet.")

sheet_rows = df[UNIVERSITY_COL].dropna()
sheet_total_rows = len(sheet_rows)

# Map cleaned -> list of (row_index, original_text)
sheet_map = defaultdict(list)
for idx, val in enumerate(sheet_rows, start=2):  # assume header in row 1
    sheet_map[safe_filename(str(val))].append((idx, str(val).strip()))

# ---------------------------
# Load CSVs from OUTPUT_DIR
# ---------------------------
if not os.path.isdir(OUTPUT_DIR):
    raise SystemExit(f"❌ OUTPUT_DIR not found: {OUTPUT_DIR}")

csv_files = [f for f in os.listdir(OUTPUT_DIR) if f.lower().endswith(".csv")]
csv_total_files = len(csv_files)

# Map cleaned -> list of filenames (catch duplicates)
csv_map = defaultdict(list)
for f in csv_files:
    base = os.path.splitext(f)[0]
    csv_map[safe_filename(base)].append(f)

# ---------------------------
# Compare
# ---------------------------
sheet_keys = set(sheet_map.keys())
csv_keys = set(csv_map.keys())

matched_keys = sheet_keys & csv_keys
missing_keys = sheet_keys - csv_keys   # in sheet, not in files
extra_keys = csv_keys - sheet_keys   # in files, not in sheet

# Duplicates
sheet_dupes = {k: v for k, v in sheet_map.items() if len(v) > 1}
csv_dupes = {k: v for k, v in csv_map.items() if len(v) > 1}

# ---------------------------
# Report
# ---------------------------
print("\n==== SUMMARY ====")
print(f"Sheet rows (non-empty '{UNIVERSITY_COL}'): {sheet_total_rows}")
print(f"Sheet unique (after cleaning):          {len(sheet_keys)}")
print(f"CSV files in '{OUTPUT_DIR}':             {csv_total_files}")
print(f"CSV unique (after cleaning):             {len(csv_keys)}")
print(f"Matched (by cleaned name):               {len(matched_keys)}")

missing_count = len(missing_keys)
extra_count = len(extra_keys)

if missing_count:
    print(
        f"\n❌ Missing CSVs for {missing_count} sheet entr{'y' if missing_count == 1 else 'ies'}:")
    for k in sorted(missing_keys):
        rows = sheet_map[k]
        for row_num, raw in rows:
            print(f"  - Row {row_num}: {raw}")

if extra_count:
    print(f"\n⚠️ Extra CSV files not present in sheet ({extra_count}):")
    for k in sorted(extra_keys):
        files = csv_map[k]
        for f in files:
            print(f"  - {f}")

if sheet_dupes:
    print(
        f"\n🔁 Duplicates in sheet ({len(sheet_dupes)} normalized keys have >1 rows):")
    for k, entries in sheet_dupes.items():
        names = ", ".join([f"Row {r}: {t}" for r, t in entries])
        print(f"  - {names}")

if csv_dupes:
    print(
        f"\n🔁 Duplicates in CSV filenames ({len(csv_dupes)} normalized keys map to >1 files):")
    for k, files in csv_dupes.items():
        print(f"  - {', '.join(files)}")

print("\n==== CHECK ====")
if missing_count == 0 and extra_count == 0 and len(sheet_keys) == len(csv_keys):
    print("✅ Counts match (by unique cleaned names).")
else:
    # Helpful hint if you “think 1 file is missing”
    diff = len(sheet_keys) - len(csv_keys)
    if diff > 0:
        print(
            f"❌ Likely missing ~{diff} file(s) (by unique cleaned names). See 'Missing CSVs' above.")
    elif diff < 0:
        print(
            f"⚠️ There are ~{-diff} extra file(s) (by unique cleaned names). See 'Extra CSV files' above.")
    else:
        print("ℹ️ Same unique counts but mismatches exist due to duplicates or naming. See details above.")


==== SUMMARY ====
Sheet rows (non-empty 'University Name'): 349
Sheet unique (after cleaning):          349
CSV files in 'Downloaded_Universities':             349
CSV unique (after cleaning):             349
Matched (by cleaned name):               349

==== CHECK ====
✅ Counts match (by unique cleaned names).


In [18]:
import os
import re
import pandas as pd
import gspread
from gspread_dataframe import get_as_dataframe

# ---------------------------
# Configurations
# ---------------------------
SERVICE_ACCOUNT_FILE = "service_account.json"
SHEET_ID = "1eYz8Nvr3BToRrmReXNLR8zQrk4X8tsdKZO_Fj9mNThc"
SHEET_NAME = "Scrapper Running"
OUTPUT_DIR = "Downloaded_Universities"

# ---------------------------
# Helpers
# ---------------------------


def clean_name(name: str) -> str:
    if not isinstance(name, str):
        return ""
    return re.sub(r"[^a-z0-9]", "", name.lower().strip())


def safe_filename(name: str) -> str:
    return re.sub(r'[.\'<>:"/\\|?*]', '', name).strip()


def is_link(value: str) -> bool:
    if pd.isna(value):
        return False
    return bool(re.match(r'^https?://', str(value).strip()))


def read_csv_with_fallback(file_path: str) -> pd.DataFrame | None:
    """Try to read CSV with multiple encodings safely."""
    encodings = ["utf-8", "latin1", "cp1252"]
    for enc in encodings:
        try:
            return pd.read_csv(file_path, encoding=enc)
        except Exception:
            continue
    return None


def is_valid_csv(file_path: str) -> bool:
    try:
        df = read_csv_with_fallback(file_path)
        if df is None:
            return False
        # Must have more than 3 columns
        if df.shape[1] <= 3:
            return False
        # First column values must all be valid links (ignoring header)
        first_col = df.iloc[1:, 0]  # skip header
        if not all(is_link(v) for v in first_col):
            return False
        return True
    except Exception as e:
        print(f"⚠️ Error validating {file_path}: {e}")
        return False


# ---------------------------
# Load Google Sheet
# ---------------------------
gc = gspread.service_account(filename=SERVICE_ACCOUNT_FILE)
sh = gc.open_by_key(SHEET_ID)
ws = sh.worksheet(SHEET_NAME)
df_sheet = get_as_dataframe(ws, evaluate_formulas=True, header=0)

# ---------------------------
# Process CSVs and update sheet
# ---------------------------
for i, row in df_sheet.iterrows():
    uni_name = str(row["University Name"]).strip()
    if not uni_name or uni_name == "nan":
        continue

    csv_filename = safe_filename(uni_name) + ".csv"
    csv_path = os.path.join(OUTPUT_DIR, csv_filename)

    if os.path.exists(csv_path):
        if is_valid_csv(csv_path):
            df_csv = read_csv_with_fallback(csv_path)
            if df_csv is not None:
                courses_count = len(df_csv) - 1  # excluding header
                df_sheet.at[i, "CSV OK?"] = "Yes"
                df_sheet.at[i, "Courses Count"] = courses_count
            else:
                df_sheet.at[i, "CSV OK?"] = "Corrupt"
                df_sheet.at[i, "Courses Count"] = ""

    else:
        df_sheet.at[i, "CSV OK?"] = "Missing"
        df_sheet.at[i, "Courses Count"] = ""

# ---------------------------
# Push back to Google Sheet
# ---------------------------
ws.update([df_sheet.columns.values.tolist()] +
          df_sheet.fillna("").values.tolist())
print("✅ Sheet updated with CSV OK? and Courses Count")

✅ Sheet updated with CSV OK? and Courses Count



🚀 Running spider for: Abertay University.csv
❌ Spider failed for Abertay University.csv: Command '['c:\\Users\\LENOVO\\AppData\\Local\\Programs\\Python\\Python313\\python.exe', '-m', 'scrapy', 'crawl', 'courses', '-a', 'csv_file=Downloaded_Universities\\Abertay University.csv']' returned non-zero exit status 2.

🚀 Running spider for: Aberystwyth University.csv
❌ Spider failed for Aberystwyth University.csv: Command '['c:\\Users\\LENOVO\\AppData\\Local\\Programs\\Python\\Python313\\python.exe', '-m', 'scrapy', 'crawl', 'courses', '-a', 'csv_file=Downloaded_Universities\\Aberystwyth University.csv']' returned non-zero exit status 2.

🚀 Running spider for: Acadia University.csv
❌ Spider failed for Acadia University.csv: Command '['c:\\Users\\LENOVO\\AppData\\Local\\Programs\\Python\\Python313\\python.exe', '-m', 'scrapy', 'crawl', 'courses', '-a', 'csv_file=Downloaded_Universities\\Acadia University.csv']' returned non-zero exit status 2.

🚀 Running spider for: Alberta University of the 

KeyboardInterrupt: 