In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Datasets/aug_train.csv')

## Opening a csv file from an URL

In [4]:
import requests
from io import StringIO
from requests.exceptions import HTTPError, Timeout, RequestException

def load_csv_from_url(url, delimiter=',', encoding='utf-8', timeout=10, headers=None):
    """
    Load a CSV file from a URL into a Pandas DataFrame with robust error handling.

    Parameters:
    - url: str, the URL to the CSV file.
    - delimiter: str, the delimiter used in the CSV file (default is comma).
    - encoding: str, the encoding of the CSV file (default is 'utf-8').
    - timeout: int, the time (in seconds) to wait for a response from the server (default is 10).
    - headers: dict, HTTP headers to include with the request (default is None).

    Returns:
    - df: DataFrame, the loaded DataFrame, or None if an error occurred.
    """
    try:
        # Set a default User-Agent header if none is provided
        if headers is None:
            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}

        # Attempt to retrieve the CSV file from the URL
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()  # Check for HTTP errors

        # Convert the response text into a file-like object
        data = StringIO(response.text)

        # Attempt to load the CSV into a DataFrame
        df = pd.read_csv(data, delimiter=delimiter, encoding=encoding)

        print(f"Successfully loaded CSV from {url}")
        return df

    except HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Timeout as timeout_err:
        print(f"Request timed out: {timeout_err}")
    except pd.errors.ParserError as parse_err:
        print(f"Error parsing CSV: {parse_err}. Check if the delimiter '{delimiter}' or encoding '{encoding}' is correct.")
    except pd.errors.EmptyDataError:
        print("No data: The file is empty or all data is NaN.")
    except RequestException as req_err:
        print(f"Error during requests to {url}: {req_err}")
    except Exception as err:
        print(f"An unexpected error occurred: {err}")

    return None

In [6]:
url = 'https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv'
df = load_csv_from_url(url)
df

Successfully loaded CSV from https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv


Unnamed: 0,Country,Region
0,Algeria,AFRICA
1,Angola,AFRICA
2,Benin,AFRICA
3,Botswana,AFRICA
4,Burkina,AFRICA
...,...,...
189,Paraguay,SOUTH AMERICA
190,Peru,SOUTH AMERICA
191,Suriname,SOUTH AMERICA
192,Uruguay,SOUTH AMERICA
