In [43]:
import pandas as pd
import numpy as np
from datetime import datetime

# Create DataFrame from Numpy Array

In [9]:
data = np.array([[1,4],[2,5],[3,6]])

In [12]:
df = pd.DataFrame(data, index=['row1', 'row2', 'row3'], columns=['col1', 'col2'])

In [13]:
df

Unnamed: 0,col1,col2
row1,1,4
row2,2,5
row3,3,6


In [None]:
data = [[1,4],[2,5],[3,6]]

In [None]:
df = pd.DataFrame(data, index=['row1', 'row2', 'row3'], columns=['col1', 'col2'])

In [14]:
df

Unnamed: 0,col1,col2
row1,1,4
row2,2,5
row3,3,6


# Create DataFrame from dictionary

In [21]:
# Lists
states = ['California', 'Texas', 'Florida', 'New York']
population = [1111111, 2222222, 3333333, 44444444]

In [22]:
# Storing lists within a dictionary
dict_states = {'States':states, 'Population': population}

In [27]:
# Creating the dataframe
df_pop = pd.DataFrame(dict_states)

In [28]:
df_pop

Unnamed: 0,States,Population
0,California,1111111
1,Texas,2222222
2,Florida,3333333
3,New York,44444444


In [29]:
df_pop['States'].index

RangeIndex(start=0, stop=4, step=1)

In [31]:
# select column with .
df_pop.States

0    California
1         Texas
2       Florida
3      New York
Name: States, dtype: object

In [32]:
df_pop['States']


0    California
1         Texas
2       Florida
3      New York
Name: States, dtype: object

In [33]:
df_pop.sort_values(['States'], ascending=False, inplace=True)


In [34]:
df_pop

Unnamed: 0,States,Population
1,Texas,2222222
3,New York,44444444
2,Florida,3333333
0,California,1111111


In [35]:
df_pop.sort_values(['States'], ascending=True, key=lambda col:col.str.lower())

Unnamed: 0,States,Population
0,California,1111111
2,Florida,3333333
3,New York,44444444
1,Texas,2222222


In [36]:
df_pop


Unnamed: 0,States,Population
1,Texas,2222222
3,New York,44444444
2,Florida,3333333
0,California,1111111


# Web Scraping with Panda

Target website: https://www.iso-ne.com/static-transform/csv/histRpts/rt-energy-offer/hbrealtimeenergyoffer_20250228.csv

In [39]:
# Try specifying the separator and skipping initial rows that might contain metadata
pd.read_csv('https://www.iso-ne.com/static-transform/csv/histRpts/rt-energy-offer/hbrealtimeenergyoffer_20250228.csv', 
            sep=',',      # Explicitly set comma as separator (try other separators like '\t', ';' if needed)
            skiprows=4)   # Skip the first 4 rows which might contain metadata
            
# Alternative approach - try to detect the separator automatically
# pd.read_csv('https://www.iso-ne.com/static-transform/csv/histRpts/rt-energy-offer/hbrealtimeenergyoffer_20250228.csv',
#             sep=None,    # Let pandas try to detect the separator
#             engine='python')  # Use the python engine which is more flexible

Unnamed: 0,H,Day,Trading Interval,Masked Lead Participant ID,Masked Asset ID,Must Take Energy,Maximum Daily Energy Available,Economic Maximum,Economic Minimum,Cold Startup Price,...,Segment 8 Price,Segment 8 MW,Segment 9 Price,Segment 9 MW,Segment 10 Price,Segment 10 MW,Claim 10,Claim 30,Unit Status,Max Daily Award Limit
0,H,Date,String,Number,Number,Number,Number,Number,Number,$,...,$,MW,$,MW,$,MW,MW,MW,String,MW
1,D,02/28/2025,01,20721,88115,0,0.000,4.000,0.100,3.10,...,,,,,,,0.000,0.000,ECONOMIC,
2,D,02/28/2025,02,20721,88115,0,0.000,4.000,0.100,3.10,...,,,,,,,0.000,0.000,ECONOMIC,
3,D,02/28/2025,03,20721,88115,0,0.000,4.000,0.100,3.10,...,,,,,,,0.000,0.000,ECONOMIC,
4,D,02/28/2025,04,20721,88115,0,0.000,4.000,0.100,3.10,...,,,,,,,0.000,0.000,ECONOMIC,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10341,D,02/28/2025,21,989046,42750,0,0.000,0.000,0.000,0.00,...,,,,,,,0.000,0.000,MUST_RUN,
10342,D,02/28/2025,22,989046,42750,0,0.000,0.000,0.000,0.00,...,,,,,,,0.000,0.000,MUST_RUN,
10343,D,02/28/2025,23,989046,42750,0,0.000,0.000,0.000,0.00,...,,,,,,,0.000,0.000,MUST_RUN,
10344,D,02/28/2025,24,989046,42750,0,0.000,0.000,0.000,0.00,...,,,,,,,0.000,0.000,MUST_RUN,


Target Website: https://www.iso-ne.com/transform/csv/fiveminrcp?type=prelim&start=20250630&end=20250630


Target Website: https://www.iso-ne.com/static-transform/csv/histRpts/da-ard/hbdayaheadardbid_20250228.csv

In [45]:
# Try specifying the separator and skipping initial rows that might contain metadata
pd.read_csv('https://www.iso-ne.com/static-transform/csv/histRpts/da-ard/hbdayaheadardbid_20250228.csv', 
            sep=',',      # Explicitly set comma as separator (try other separators like '\t', ';' if needed)
            skiprows=4)   # Skip the first 4 rows which might contain metadata

Unnamed: 0,H,Day,Hour Ending,Masked Lead Participant ID,Masked Asset ID,Claim 10,Claim 30,Minimum Consumption,Maximum Consumption,Segment 1 Price,...,Segment 7 Price,Segment 7 MW,Segment 8 Price,Segment 8 MW,Segment 9 Price,Segment 9 MW,Segment 10 Price,Segment 10 MW,Unit Status,Max Daily Award Limit
0,H,Date,Number,Number,Number,Number,Number,Number,Number,$,...,$,MW,$,MW,$,MW,$,MW,String,MW
1,D,02/28/2025,01,148550,31628,0,0,0,5,-150,...,,,,,,,,,MUST_RUN,0.000
2,D,02/28/2025,02,148550,31628,0,0,0,5,-150,...,,,,,,,,,MUST_RUN,0.000
3,D,02/28/2025,03,148550,31628,0,0,0,5,-150,...,,,,,,,,,MUST_RUN,0.000
4,D,02/28/2025,04,148550,31628,0,0,0,5,-150,...,,,,,,,,,MUST_RUN,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,D,02/28/2025,21,978570,88183,0,0,0,0,0,...,,,,,,,,,UNAVAILABLE,0.000
526,D,02/28/2025,22,978570,88183,0,0,0,0,0,...,,,,,,,,,UNAVAILABLE,0.000
527,D,02/28/2025,23,978570,88183,0,0,0,0,0,...,,,,,,,,,UNAVAILABLE,0.000
528,D,02/28/2025,24,978570,88183,0,0,0,0,0,...,,,,,,,,,UNAVAILABLE,0.000


Target Website: https://www.iso-ne.com/static-assets/documents/100020/2025_prd_aggregation_zone_lmp_stats.xlsx

In [49]:
# Try specifying the separator and skipping initial rows that might contain metadata
pd.read_excel('https://www.iso-ne.com/static-assets/documents/100020/2025_prd_aggregation_zone_lmp_stats.xlsx', 
            skiprows=16)   # Skip the first 4 rows which might contain metadata

Unnamed: 0,Location Id,Location Name,Unnamed: 2
0,7600,CT_Eastern,
1,7601,CT_Northern,
2,7602,CT_Norwalk-Stamford,
3,7603,CT_Western_SWCT,
4,7604,CT_Western,
5,7605,ME_Bangor_Hydro,
6,7606,ME_Maine,
7,7607,ME_Portland,
8,7608,MA_Boston,
9,7609,MA_North_Shore,


In [54]:
# This code:
# 1. Uses `--local` flag for git config commands to only set configuration for the current repository
# 2. Doesn't modify your global Git settings
# 3. Allows you to specify different credentials for different GitHub accounts
# 4. Stores your Git user info in variables within the notebook

# This approach is better when working with multiple GitHub accounts 
# as it keeps the configuration isolated to just this repository.


# Install required packages if not already installed
!pip install -q nbgitpuller jupyterlab-git

# Import necessary libraries
import os
import getpass
import subprocess

# Define your Git user info for this specific repository
git_name = "Your Name"
git_email = "your.email@example.com"

# Initialize Git repository (if not already initialized)
!git init

# Set local Git configuration for this repository only
!git config --local user.name "buffaloo"
!git config --local user.email "buffaloo37@hotmail.com"

# Add your notebook file (replace 'YourNotebook.ipynb' with your actual filename)
!git add panda-webscrap-ipp.ipynb

# Commit your changes
!git commit -m "Add Jupyter notebook"

# Create a new repository on GitHub first, then connect your local repo
# Replace the URL with your GitHub repository URL
repo_url = "https://github.com/agentic-digital-twins/python-jupyter.git"
!git remote add origin {repo_url}

# Push to GitHub (you'll be prompted for credentials)
print("Enter your GitHub username for this specific account:")
username = input()
print("Enter your GitHub personal access token (will be hidden):")
token = getpass.getpass()

# Set the remote URL with credentials
remote_with_auth = f"https://{username}:{token}@github.com/agentic-digital-twins/python-jupyter.git"
!git remote set-url origin {remote_with_auth}

# Push to GitHub
!git push -u origin main  # or 'main' depending on your default branch

Reinitialized existing Git repository in C:/Users/buffa/.git/


The file will have its original line endings in your working directory


[master efc0f56] Add Jupyter notebook with Fibonacci function
 1 file changed, 10 insertions(+), 99 deletions(-)


error: remote origin already exists.


Enter your GitHub username for this specific account:


 buffaloo


Enter your GitHub personal access token (will be hidden):


 ········


error: src refspec main does not match any
error: src refspec # does not match any
error: src refspec or does not match any
error: src refspec 'main' does not match any
error: src refspec depending does not match any
error: src refspec on does not match any
error: src refspec your does not match any
error: src refspec default does not match any
error: src refspec branch does not match any
error: failed to push some refs to 'https://github.com/agentic-digital-twins/python-jupyter.git'
