## CSV & XLSX

In [None]:
# Dealing with CSV
import pandas as pd

df = pd.read_csv("data.csv")  # Import CSV file
print(df.head())

# Dealing with XLS and XLSX
df = pd.read_excel("data.xlsx", sheet_name="Sheet1")  # Specify sheet if needed
print(df.head())




## JSON File 

In [None]:
# Dealing with Json 
import json

df = pd.read_json("data.json")  # Directly read into Pandas DataFrame
print(df.head())

# If the JSON file has nested data, you may need to normalize it
from pandas import json_normalize

with open("data.json") as file:
    data = json.load(file)
df = json_normalize(data)  # Flatten nested JSON


## Parequet & PKL

In [None]:
# Dealing with Parequet 
df = pd.read_parquet("data.parquet")  # Efficient for large datasets
print(df.head())

# Dealing with PKL

df = pd.read_pickle("data.pkl")
print(df.head())

## TXT Files

In [None]:
# Dealing with Text File (.txt, .dat, .log)

df = pd.read_csv("data.txt", delimiter="\t")  # Change delimiter if needed
print(df.head())


## XML File

In [None]:
# Dealing with XML File

import xml.etree.ElementTree as ET

tree = ET.parse("data.xml")
root = tree.getroot()

# Convert XML to DataFrame
data = []
for element in root.findall(".//row"):
    row_data = {child.tag: child.text for child in element}
    data.append(row_data)

df = pd.DataFrame(data)
print(df.head())

## HTML & HDF5

In [None]:
# Dealing with HTML Table from Web (.html)
df_list = pd.read_html("https://example.com")  # Reads all tables into a list
df = df_list[0]  # Select the first table
print(df.head())


# Dealing with HDF5 File (.h5)
df = pd.read_hdf("data.h5", key="df")
print(df.head())

## SAS File and SQL File

In [None]:
# Dealing with SQL Database 

import sqlite3
conn = sqlite3.connect("database.db")  # Connect to SQLite
df = pd.read_sql_query("SELECT * FROM table_name", conn)
print(df.head())


# Dealing with SAS File (.sas7bdat)

import pandas as pd
df = pd.read_sas("data.sas7bdat")
print(df.head())




## DTA File

In [None]:
# Dealing with DTA File (Stata File - .dta)
df = pd.read_stata("data.dta")
print(df.head())