In [None]:
import csv
import re
import time
from datetime import datetime
import requests

def multi_range(*ranges):
    for r in ranges:
        yield from range(*r)

# Define numbers for sports
range1 = (151, 161)
range2 = (350, 357)

# Use the custom_range generator to combine the ranges
multi_range = multi_range(range1, range2)

# Function to adjust year based on month if it is a split year season
def adjust_year(year_str, month_str):
    if '-' in year_str:
        year_range = year_str.split('-')
        year1, year2 = map(int, year_range)
        if int(month_str) > 5:  # If month is June or later, earlier year
            year_str = str(year1)
        else:
            year_str = str(year2+2000)  # Return the second year if before June
    return year_str

# Initialize a list to store all rows
all_data_rows = []

# URL of the resource
for gamecode in multi_range:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"
    print(gamecode)
    # Retry until text content is fetched successfully, provides lots of errors
    while True:
        # Send a request to the URL using requests.get
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")
            # Extract year and season from the second row
            second_row = lines[1].strip()
            year_str_match = re.match(r'(\d{4}-\d{2}|\d{4})', second_row) #Looks for a year value within year and season
            year_str = year_str_match.group(1) if year_str_match else None #Looks for year_str to match
            # Remove Schedulefrom the end of the season column, just names sports. 
            season = second_row.replace("Schedule", "").strip()
            # Check if the season contains specific sports (might have to filter out by gender)
            if any(sport in season for sport in ["Football", "Volleyball", "Basketball", "Hockey"]):
                # Process each line and split it based on multiple spaces using regular expression
                for line in lines[10:]:  # Skip the first 10 rows
                    # Strip whitespace and split the line based on multiple spaces
                    data = re.split(r'\s{2,}', line.strip())
                    # Check if the row has more than 2 elements and the third element is not "Away" or "Neutral"
                    if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                        # Remove the last element from the row if the row contains more than three elements
                        if len(data) > 3:
                            data = data[:-1]
                        # Append the split data to data_rows with the year and season values added
                        if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                            # Extracting only the date part without the day of the week
                            date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                            # Extract month from the date
                            month_str = datetime.strptime(date_without_day, "%b %d").strftime("%m")
                            # Adjust the year based on month for this row
                            year_str_for_row = adjust_year(year_str, month_str)
                            # Combine date with year and format it as "mm/dd/yyyy"
                            combined_date = datetime.strptime(f"{date_without_day} {year_str_for_row}", "%b %d %Y").strftime("%m/%d/%Y")
                            data[0] = combined_date
                            all_data_rows.append([year_str_for_row, season] + data)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 1 seconds before retrying
        time.sleep(1)
    
# Write to CSV
output_file = f"combined_schedule.csv"
with open(output_file, "w", newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write header row
    writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
    # Write data rows
    writer.writerows(all_data_rows)

print(f"Data written to '{output_file}'")


In [None]:
####READY 
import requests
import csv
import re
import time
from datetime import datetime

# Initialize a list to store all data rows
all_data_rows = []

# URL of the resource
sport_list = list(range(150, 152))
for gamecode in sport_list:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"

    # Retry until text content is fetched successfully
    while True:
        # Send a GET request to the URL
        response = requests.get(url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")

            # Extract the year and season from the second row
            second_row = lines[1].strip()
            year = second_row[:4]
            season = second_row[4:].strip()

            # Remove "Schedule" from the end of the season column
            season = season.replace("Schedule", "").strip()

            # Initialize a list to store data rows for this iteration
            data_rows = []

            # Process each line and split it based on multiple spaces using regular expression
            for line in lines[10:]:  # Skip the first 10 rows
                # Strip whitespace and split the line based on multiple spaces
                data = re.split(r'\s{2,}', line.strip()) 
                # Check if the row has more than 3 elements and the third element is not "Away" or "Neutral"
                if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                    # Remove the last element from the row if the row contains more than three elements
                    if len(data) > 3:
                        data = data[:-1]
                    # Append the split data to data_rows with the year and season values added
                    if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                        # Extracting only the date part without the day of the week
                        date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                        # Combine date with year and format it as "mm/dd/yyyy"
                        combined_date = datetime.strptime(f"{date_without_day} {year}", "%b %d %Y").strftime("%m/%d/%Y")
                        data[0] = combined_date
                        data_rows.append([year, season] + data)

            # Extend the all_data_rows list with the data rows for this iteration
            all_data_rows.extend(data_rows)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 5 seconds before retrying
        time.sleep(1)

# Write all data rows to a CSV file
output_file = "combined_schedule_with_year_and_season.csv"
with open(output_file, "w", newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write header row
    writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
    # Write data rows
    writer.writerows(all_data_rows)

print(f"Data written to '{output_file}'")


In [None]:
###SHOULD BE READY TO GO
import csv
import re
import time
from datetime import datetime
import requests

# Function to adjust year based on month
def adjust_year(year_str, month_str):
    if '-' in year_str:
        year_range = year_str.split('-')
        year1, year2 = map(int, year_range)
        if int(month_str) > 5:  # Adjust year if month is June or later
            year_str = str(year1)  # Return the first year
        else:
            year_str = str(year2+2000)  # Return the second year
        print(month_str)
        print(year_str)
    return year_str

# Initialize a list to store all data rows
all_data_rows = []

# URL of the resource
sport_list = list(range(175, 178))
for gamecode in sport_list:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"

    # Retry until text content is fetched successfully
    while True:
        # Send a GET request to the URL
        response = requests.get(url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")

            # Extract the year and season from the second row
            second_row = lines[1].strip()
            year_str_match = re.match(r'(\d{4}-\d{2}|\d{4})', second_row)
            year_str = year_str_match.group(1) if year_str_match else None

            # Remove "Schedule" from the end of the season column
            season = second_row.replace("Schedule", "").strip()

            # Process each line and split it based on multiple spaces using regular expression
            for line in lines[10:]:  # Skip the first 10 rows
                # Strip whitespace and split the line based on multiple spaces
                data = re.split(r'\s{2,}', line.strip())
                # Check if the row has more than 3 elements and the third element is not "Away" or "Neutral"
                if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                    # Remove the last element from the row if the row contains more than three elements
                    if len(data) > 3:
                        data = data[:-1]
                    # Append the split data to data_rows with the year and season values added
                    if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                        # Extracting only the date part without the day of the week
                        date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                        # Extract month from the date
                        month_str = datetime.strptime(date_without_day, "%b %d").strftime("%m")
                        # Adjust the year based on month for this row
                        year_str_for_row = adjust_year(year_str, month_str)
                        # Combine date with year and format it as "mm/dd/yyyy"
                        combined_date = datetime.strptime(f"{date_without_day} {year_str_for_row}", "%b %d %Y").strftime("%m/%d/%Y")
                        data[0] = combined_date
                        all_data_rows.append([year_str_for_row, season] + data)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 5 seconds before retrying
        time.sleep(0.5)

# Write all data rows to a CSV file
output_file = "combined_schedule_with_year_and_season.csv"
with open(output_file, "w", newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write header row
    writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
    # Write data rows
    writer.writerows(all_data_rows)

print(f"Data written to '{output_file}'")


In [17]:
####USE THIS IS PERFECT
import csv
import re
import time
from datetime import datetime
import requests

# Function to adjust year based on month if it is a split year season
def adjust_year(year_str, month_str):
    if '-' in year_str:
        year_range = year_str.split('-')
        year1, year2 = map(int, year_range)
        if int(month_str) > 5:  #If month is June or later, earlier year
            year_str = str(year1)
        else:
            year_str = str(year2+2000)  # Return the second year if before June
    return year_str

# Initialize a list to store all rows
all_data_rows = []

# URL of the resource
sport_list = list(range(170, 185))
for gamecode in sport_list:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"

    # Retry until text content is fetched successfully, provides lots of errors
    while True:
        # Send a request to the URL
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")
            # Extract year and season from the second row
            second_row = lines[1].strip()
            year_str_match = re.match(r'(\d{4}-\d{2}|\d{4})', second_row) #Looks for a year value within year and season
            year_str = year_str_match.group(1) if year_str_match else None #Looks for year_str to match
            # Remove Schedulefrom the end of the season column, just names sports. 
            season = second_row.replace("Schedule", "").strip()
            # Check if the season contains specific sports (might have to filter out by gender)
            if any(sport in season for sport in ["Football", "Volleyball", "Basketball", "Hockey"]):
                # Process each line and split it based on multiple spaces using regular expression
                for line in lines[10:]:  # Skip the first 10 rows
                    # Strip whitespace and split the line based on multiple spaces
                    data = re.split(r'\s{2,}', line.strip())
                    # Check if the row has more than 2 elements and the third element is not "Away" or "Neutral"
                    if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                        # Remove the last element from the row if the row contains more than three elements
                        if len(data) > 3:
                            data = data[:-1]
                        # Append the split data to data_rows with the year and season values added
                        if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                            # Extracting only the date part without the day of the week
                            date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                            # Extract month from the date
                            month_str = datetime.strptime(date_without_day, "%b %d").strftime("%m")
                            # Adjust the year based on month for this row
                            year_str_for_row = adjust_year(year_str, month_str)
                            # Combine date with year and format it as "mm/dd/yyyy"
                            combined_date = datetime.strptime(f"{date_without_day} {year_str_for_row}", "%b %d %Y").strftime("%m/%d/%Y")
                            data[0] = combined_date
                            all_data_rows.append([year_str_for_row, season] + data)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 5 seconds before retrying
        time.sleep(0.1)

# Write all data rows to a CSV file
output_file = "combined_schedule_with_year_and_season_filtered.csv"
with open(output_file, "w", newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write header row
    writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
    # Write data rows
    writer.writerows(all_data_rows)

print(f"Data written to '{output_file}'")


Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=170
Error: 404 at https://gophersports.com/services/sche

KeyboardInterrupt: 

In [11]:
import csv
import re
import time
from datetime import datetime
import requests

code1 = 154
code2 = 200
# Function to adjust year based on month if it is a split year season
def adjust_year(year_str, month_str):
    if '-' in year_str:
        year_range = year_str.split('-')
        year1, year2 = map(int, year_range)
        if int(month_str) > 5:  # If month is June or later, earlier year
            year_str = str(year1)
        else:
            year_str = str(year2+2000)  # Return the second year if before June
    return year_str

# Initialize a list to store all rows
all_data_rows = []

# URL of the resource
sport_list = list(range(code1, code2))
loop_count = 0
for gamecode in sport_list:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"
    print(gamecode)
    # Retry until text content is fetched successfully, provides lots of errors
    while True:
        # Send a request to the URL
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")
            # Extract year and season from the second row
            second_row = lines[1].strip()
            year_str_match = re.match(r'(\d{4}-\d{2}|\d{4})', second_row) #Looks for a year value within year and season
            year_str = year_str_match.group(1) if year_str_match else None #Looks for year_str to match
            # Remove Schedulefrom the end of the season column, just names sports. 
            season = second_row.replace("Schedule", "").strip()
            # Check if the season contains specific sports (might have to filter out by gender)
            if any(sport in season for sport in ["Football", "Volleyball", "Basketball", "Hockey"]):
                # Process each line and split it based on multiple spaces using regular expression
                for line in lines[10:]:  # Skip the first 10 rows
                    # Strip whitespace and split the line based on multiple spaces
                    data = re.split(r'\s{2,}', line.strip())
                    # Check if the row has more than 2 elements and the third element is not "Away" or "Neutral"
                    if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                        # Remove the last element from the row if the row contains more than three elements
                        if len(data) > 3:
                            data = data[:-1]
                        # Append the split data to data_rows with the year and season values added
                        if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                            # Extracting only the date part without the day of the week
                            date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                            # Extract month from the date
                            month_str = datetime.strptime(date_without_day, "%b %d").strftime("%m")
                            # Adjust the year based on month for this row
                            year_str_for_row = adjust_year(year_str, month_str)
                            # Combine date with year and format it as "mm/dd/yyyy"
                            combined_date = datetime.strptime(f"{date_without_day} {year_str_for_row}", "%b %d %Y").strftime("%m/%d/%Y")
                            data[0] = combined_date
                            all_data_rows.append([year_str_for_row, season] + data)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 5 seconds before retrying
        time.sleep(0.5)
    
    # Write to CSV every loop
    loop_count += 1
    if loop_count % 1 == 0:
        output_file = f"combined_schedule_{code1}_{gamecode}.csv"
        with open(output_file, "w", newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            # Write header row
            writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
            # Write data rows
            writer.writerows(all_data_rows)

        print(f"Data written to '{output_file}'")
        # Clear all_data_rows after writing
        #all_data_rows = []


154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=154
Error: 404 at https://gophersports.com/services/

KeyboardInterrupt: 

## Get Football Games Using Curl

In [13]:
###Uses curl and works a bit better .Might need to split up by sport and have a smaller range of codes
import csv
import re
import time
from datetime import datetime
import requests

def multi_range(*ranges):
    for r in ranges:
        yield from range(*r)

# Define numbers for sports
range1 = (151, 161)
range2 = (350, 357)

# Use the custom_range generator to combine the ranges
multi_range = multi_range(range1, range2)

# Function to adjust year based on month if it is a split year season
def adjust_year(year_str, month_str):
    if '-' in year_str:
        year_range = year_str.split('-')
        year1, year2 = map(int, year_range)
        if int(month_str) > 5:  # If month is June or later, earlier year
            year_str = str(year1)
        else:
            year_str = str(year2+2000)  # Return the second year if before June
    return year_str

# Initialize a list to store all rows
all_data_rows = []

# URL of the resource
loop_count = 0
for gamecode in multi_range:
    url = f"https://gophersports.com/services/schedule_txt.ashx?schedule={gamecode}"
    print(gamecode)
    # Retry until text content is fetched successfully, provides lots of errors
    while True:
        # Send a request to the URL using requests.get
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Split the text content by newline to get each line
            lines = response.text.split("\n")
            # Extract year and season from the second row
            second_row = lines[1].strip()
            year_str_match = re.match(r'(\d{4}-\d{2}|\d{4})', second_row) #Looks for a year value within year and season
            year_str = year_str_match.group(1) if year_str_match else None #Looks for year_str to match
            # Remove Schedulefrom the end of the season column, just names sports. 
            season = second_row.replace("Schedule", "").strip()
            # Check if the season contains specific sports (might have to filter out by gender)
            if any(sport in season for sport in ["Football", "Volleyball", "Basketball", "Hockey"]):
                # Process each line and split it based on multiple spaces using regular expression
                for line in lines[10:]:  # Skip the first 10 rows
                    # Strip whitespace and split the line based on multiple spaces
                    data = re.split(r'\s{2,}', line.strip())
                    # Check if the row has more than 2 elements and the third element is not "Away" or "Neutral"
                    if len(data) > 2 and data[2] not in ["Away", "Neutral"]:
                        # Remove the last element from the row if the row contains more than three elements
                        if len(data) > 3:
                            data = data[:-1]
                        # Append the split data to data_rows with the year and season values added
                        if data[0] != "Date" or data[1] != "Time":  # Add this condition to filter out rows with "Date" and "Time"
                            # Extracting only the date part without the day of the week
                            date_without_day = re.sub(r'\s*\([^)]*\)', '', data[0])
                            # Extract month from the date
                            month_str = datetime.strptime(date_without_day, "%b %d").strftime("%m")
                            # Adjust the year based on month for this row
                            year_str_for_row = adjust_year(year_str, month_str)
                            # Combine date with year and format it as "mm/dd/yyyy"
                            combined_date = datetime.strptime(f"{date_without_day} {year_str_for_row}", "%b %d %Y").strftime("%m/%d/%Y")
                            data[0] = combined_date
                            all_data_rows.append([year_str_for_row, season] + data)

            break  # Exit the loop if data is fetched successfully
        else:
            # Print an error message if the request was not successful
            print(f"Error: {response.status_code} at {url}")

        # Wait for 5 seconds before retrying
        time.sleep(1)
    
    # Write to CSV
    loop_count += 1
    if loop_count % 20 == 0:
        output_file = f"combined_schedule_{code1}_{gamecode}.csv"
        with open(output_file, "w", newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            # Write header row
            writer.writerow(["Year", "Season", "Date", "Time", "At", "Opponent", "Location"])
            # Write data rows
            writer.writerows(all_data_rows)

        print(f"Data written to '{output_file}'")
        # Clear all_data_rows after writing
        #all_data_rows = []


151
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=151
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=151
Error: 404 at https://gophersports.com/services/schedule_txt.ashx?schedule=151


KeyboardInterrupt: 

In [9]:
ranges = [(151, 161), (350, 357)]

for i in ranges:
    print(i)

(151, 161)
(350, 357)
