In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle

In [None]:
company_data = pd.read_csv('/FMP data/Stocks/Daily Prices/close.csv', index_col=0)
commodity_data = pd.read_csv('/FMP data/Commodities/Daily Prices/close.csv', index_col=0)

In [None]:
# Transpose the data to have dates as rows
company_data = company_data.transpose()
commodity_data = commodity_data.transpose()

# Convert columns to datetime
company_data.index = pd.to_datetime(company_data.index)
commodity_data.index = pd.to_datetime(commodity_data.index)

# Remove companies and commodities with at least 50% of the time stamps missing
threshold = 0.5 * len(company_data)
company_data = company_data.loc[:, company_data.isnull().sum() < threshold]
commodity_data = commodity_data.loc[:, commodity_data.isnull().sum() < threshold]

In [None]:
# Filter out non-working days where all values are missing
company_data = company_data.dropna(how='all')
commodity_data = commodity_data.dropna(how='all')

# Print the number of cases with missing values after filtering
print(f"Number of cases with missing values in company data after filtering: {company_data.isnull().sum().sum()}")
print(f"Number of cases with missing values in commodity data after filtering: {commodity_data.isnull().sum().sum()}")

# Handle missing values on working days by forward filling then backward filling
company_data = company_data.ffill().bfill()
commodity_data = commodity_data.ffill().bfill()

Number of cases with missing values in company data after filtering: 411
Number of cases with missing values in commodity data after filtering: 88


In [None]:
company_data

Ticker,NEE,BEP,FSLR,SEDG,RUN,AMD,NVDA,TXN,AVGO,ADI,...,AA,WPM,TSLA,F,GM,LCID,ALB,HON,ENS,LTHM
2024-06-12,72.26,25.66,300.71,44.67,14.65,160.24,125.20,198.98,1495.51,238.44,...,39.71,53.52,177.20,12.08,48.86,2.65,112.65,208.50,109.59,5.12
2024-06-11,72.74,26.15,294.53,45.89,14.55,158.96,120.91,197.44,1461.03,236.30,...,40.21,53.33,170.66,12.11,48.21,2.66,113.97,209.48,106.38,5.12
2024-06-10,76.97,27.06,279.80,46.89,14.15,160.34,121.79,197.62,1440.47,235.37,...,41.16,53.81,173.79,12.38,47.57,2.73,114.06,210.74,105.44,5.12
2024-06-07,75.39,26.63,267.34,46.29,13.45,167.87,120.89,195.61,1406.64,234.77,...,40.91,52.59,177.48,12.15,45.72,2.73,114.94,208.78,104.29,5.12
2024-06-06,76.70,27.81,273.45,48.32,14.44,166.78,121.00,196.24,1401.27,237.41,...,42.65,55.59,177.94,12.07,45.61,2.79,118.18,208.45,106.28,5.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-06-21,51.89,18.36,62.91,59.40,18.52,29.10,3.79,112.32,273.99,113.67,...,22.60,24.15,14.79,9.99,36.92,9.89,68.34,174.31,65.75,6.97
2019-06-20,51.60,18.38,63.04,59.68,18.75,30.01,3.85,112.71,279.05,112.56,...,22.51,24.43,14.64,10.04,36.96,9.89,69.49,176.29,65.39,6.83
2019-06-19,51.56,18.26,62.78,59.20,18.26,30.50,3.83,111.30,277.49,111.04,...,22.40,23.70,15.10,10.04,36.78,9.89,68.94,175.41,63.99,7.09
2019-06-18,51.36,18.11,62.87,58.56,18.27,30.45,3.82,111.04,278.05,109.94,...,22.54,23.65,14.98,10.10,36.70,9.89,70.77,175.75,63.22,7.01


In [None]:
nodes_dict = {
    'NEE': 0, 'BEP': 1, 'FSLR': 2, 'SEDG': 3, 'RUN': 4, 'AMD': 5, 'NVDA': 6, 'TXN': 7,
    'AVGO': 8, 'ADI': 9, 'FCX': 10, 'SQM': 11, 'AA': 12, 'WPM': 13, 'TSLA': 14, 'F': 15,
    'GM': 16, 'LCID': 17, 'ALB': 18, 'HON': 19, 'ENS': 20, 'LTHM': 21, 'GCUSD': 22,
    'SIUSD': 23, 'HGUSD': 24, 'PLUSD': 25, 'ALIUSD': 26, 'CLUSD': 27, 'NGUSD': 28,
    'PAUSD': 29
}

reversed_node_dict = {value: key for key, value in nodes_dict.items()}


In [None]:
basic_info = {
    'NEE': 'NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear,natural gas, and other clean energy. It also develops, constructs, and operates long-term contracted assets that consists of clean energy solutions, such as renewable generation facilities, battery storage projects, and electric transmission facilities; sells energy commodities; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. The company had approximately 33,276 megawatts of net generating capacity; approximately 90,000 circuit miles of transmission and distribution lines; and 883 substations. It serves approximately 12 million people through approximately 5.9 million customer accounts in the east and lower west coasts of Florida. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.',
    'BEP': 'Brookfield Renewable Partners L.P. owns a portfolio of renewable power generating facilities primarily in North America, Colombia, and Brazil. The company generates electricity through hydroelectric, wind, solar, distributed generation, and pumped storage, as well as renewable natural gas, carbon capture and storage, recycling, cogeneration biomass, nuclear services, and power transformation. Brookfield Renewable Partners Limited operates as the general partner of Brookfield Renewable Partners L.P. The company was formerly known as Brookfield Renewable Energy Partners L.P. and changed its name to Brookfield Renewable Partners L.P. in May 2016. Brookfield Renewable Partners L.P. was founded in 1999 and is based in Toronto, Canada. Brookfield Renewable Partners L.P operates as a subsidiary of Brookfield Corporation.',
    'FSLR': "First Solar, Inc., a solar technology company, provides photovoltaic (PV) solar energy solutions in the United States, France, Japan, Chile, and internationally. The company manufactures and sells PV solar modules with a thin film semiconductor technology that provides a lower-carbon alternative to conventional crystalline silicon PV solar modules. It designs, manufactures, and sells cadmium telluride solar modules that converts sunlight into electricity. The company's residual business operations include project development activities, operations and maintenance services, and the sale of PV solar power systems to third-party customers. It serves developers and operators of systems, utilities, independent power producers, commercial and industrial companies, and other system owners. The company was formerly known as First Solar Holdings, Inc. and changed its name to First Solar, Inc. in 2006. First Solar, Inc. was founded in 1999 and is headquartered in Tempe, Arizona.",
    'SEDG': 'SolarEdge Technologies, Inc., together with its subsidiaries, designs, develops, manufactures, and sells direct current (DC) optimized inverter systems for solar photovoltaic (PV) installations in the United States, Germany, the Netherlands, Italy, rest of Europe, and internationally. It operates in two segments, Solar and Energy Storage. The Solar segment offers power optimizers, inverters, batteries, storage solutions, electric vehicle chargers, smart tracking solutions, and smart energy management software products; Monitoring platform, a cloud-based monitoring platform, which collects power, voltage, current, and system data sent from inverters and power optimizers; and MySolarEdge app, that enables system owners to track their real-time system production and household energy consumption. This segment also provides Designer platform, an web-based tool that helps solar professionals to plan, build, and validate residential and commercial systems; Mapper application for registering the physical layout of new PV sites installed with DC optimized inverter systems; SetApp application that activates and configurate inverters; and grid services. The Energy Storage segment provides lithium-ion cells and containerized battery systems (BESS) solutions for commercial, industrial, and utility markets; modules and racks; purpose-built components and solutions, and hardware and software tools; and pre and post sales engineering support for designing, building, and managing battery and system solutions. The company offers e-mobility products, automated machines, and UPS products; and pre-sales support, ongoing trainings, and technical support and after installation services. It sells its products through solar installers and distributors, electrical equipment wholesalers, and PV module manufacturers, as well as engineering, procurement, and construction firms. SolarEdge Technologies, Inc. was incorporated in 2006 and is headquartered in Herzliya, Israel.',
    'RUN': "Sunrun Inc. designs, develops, installs, sells, owns, and maintains residential solar energy systems in the United States. It also sells solar energy systems and products, such as panels and racking; and solar leads generated to customers. In addition, the company offers battery storage along with solar energy systems; and sells services to commercial developers through multi-family and new homes. Its primary customers are residential homeowners. The company markets and sells its products through direct-to-consumer approach across online, retail, mass media, digital media, canvassing, field marketing, and referral channels, as well as its partner network. Sunrun Inc. was founded in 2007 and is headquartered in San Francisco, California.",
    'AMD': "Advanced Micro Devices, Inc. operates as a semiconductor company worldwide. It operates through Data Center, Client, Gaming, and Embedded segments. The company offers x86 microprocessors and graphics processing units (GPUs) as an accelerated processing unit, chipsets, data center, and professional GPUs; and embedded processors, and semi-custom system-on-chip (SoC) products, microprocessor and SoC development services and technology, data processing unites, field programmable gate arrays (FPGA), and adaptive SoC products. It provides processors under the AMD Ryzen, AMD Ryzen PRO, Ryzen Threadripper, Ryzen Threadripper PRO, AMD Athlon, AMD Athlon PRO, and AMD PRO A-Series brand names; graphics under the AMD Radeon graphics and AMD Embedded Radeon graphics; and professional graphics under the AMD Radeon Pro graphics brand name. In addition, the company offers data center graphics under the Radeon Instinct and Radeon PRO V-series brands, as well as servers under the AMD Instinct accelerators brand; server microprocessors under the AMD EPYC brands; low power solutions under the AMD Athlon, AMD Geode, AMD Ryzen, AMD EPYC, AMD R-Series, and G-Series brands; FPGA products under the Virtex-6, Virtex-7, Virtex UltraScale+, Kintex-7, Kintex UltraScale, Kintex UltraScale+, Artix-7, Artix UltraScale+, Spartan-6, and Spartan-7 brands; adaptive SOCs under the Zynq-7000, Zynq UltraScale+ MPSoC, Zynq UltraScale+ RFSoCs, Versal HBM, Versal Premium, Versal Prime, Versal AI Core, Versal AI Edge, Vitis, and Vivado brands; and compute and network acceleration board products under the Alveo brand. It serves original equipment and design manufacturers, public cloud service providers, system integrators, independent distributors, and add-in-board manufacturers through its direct sales force, and sales representatives. Advanced Micro Devices, Inc. was incorporated in 1969 and is headquartered in Santa Clara, California.",
    'NVDA': "NVIDIA Corporation provides graphics and compute and networking solutions in the United States, Taiwan, China, Hong Kong, and internationally. The Graphics segment offers GeForce GPUs for gaming and PCs, the GeForce NOW game streaming service and related infrastructure, and solutions for gaming platforms; Quadro/NVIDIA RTX GPUs for enterprise workstation graphics; virtual GPU or vGPU software for cloud-based visual and virtual computing; automotive platforms for infotainment systems; and Omniverse software for building and operating metaverse and 3D internet applications. The Compute & Networking segment comprises Data Center computing platforms and end-to-end networking platforms, including Quantum for InfiniBand and Spectrum for Ethernet; NVIDIA DRIVE automated-driving platform and automotive development agreements; Jetson robotics and other embedded platforms; NVIDIA AI Enterprise and other software; and DGX Cloud software and services. The company's products are used in gaming, professional visualization, data center, and automotive markets. It sells its products to original equipment manufacturers, original device manufacturers, system integrators and distributors, independent software vendors, cloud service providers, consumer internet companies, add-in board manufacturers, distributors, automotive manufacturers and tier-1 automotive suppliers, and other ecosystem participants. NVIDIA Corporation was incorporated in 1993 and is headquartered in Santa Clara, California.",
    'TXN': "Texas Instruments Incorporated designs, manufactures, and sells semiconductors to electronics designers and manufacturers in the United States and internationally. The company operates through Analog and Embedded Processing segments. The Analog segment offers power products to manage power requirements across various voltage levels, including battery-management solutions, DC/DC switching regulators, AC/DC and isolated controllers and converters, power switches, linear regulators, voltage references, and lighting products. This segment provides signal chain products that sense, condition, and measure signals to allow information to be transferred or converted for further processing and control, including amplifiers, data converters, interface products, motor drives, clocks, and logic and sensing products. The Embedded Processing segment offers microcontrollers that are used in electronic equipment; digital signal processors for mathematical computations; and applications processors for specific computing activity. This segment offers products for use in various markets, such as industrial, automotive, personal electronics, communications equipment, enterprise systems, and calculators and other. It provides DLP products primarily for use in project high-definition images; calculators; and application-specific integrated circuits. The company markets and sells its semiconductor products through direct sales and distributors, as well as through its website. Texas Instruments Incorporated was founded in 1930 and is headquartered in Dallas, Texas.",
    'AVGO': "Broadcom Inc. designs, develops, and supplies various semiconductor devices with a focus on complex digital and mixed signal complementary metal oxide semiconductor based devices and analog III-V based products worldwide. The company operates in two segments, Semiconductor Solutions and Infrastructure Software. It provides set-top box system-on-chips (SoCs); cable, digital subscriber line, and passive optical networking central office/consumer premise equipment SoCs; wireless local area network access point SoCs; Ethernet switching and routing custom silicon solutions; serializer/deserializer application specific integrated circuits; optical and copper, and physical layer devices; and fiber optic components and RF semiconductor devices. The company also offers RF front end modules and filter; Wi-Fi, Bluetooth, and global positioning system/global navigation satellite system SoCs; custom touch controllers; inductive charging; attached small computer system interface, and redundant array of independent disks controllers and adapters; peripheral component interconnect express; fiber channel host bus adapters; read channel based SoCs; custom flash controllers; preamplifiers; optocouplers, industrial fiber optics, and motion control encoders and subsystems; light emitting diode, ethernet PHYs, switch ICs, and camera microcontrollers. Its products are used in various applications, including enterprise and data center networking, home connectivity, set-top boxes, broadband access, telecommunication equipment, smartphones and base stations, data center servers and storage systems, factory automation, power generation and alternative energy systems, and electronic displays. Broadcom Inc. was founded in 1961 and is headquartered in Palo Alto, California.",
    'ADI': "Analog Devices, Inc. designs, manufactures, tests, and markets integrated circuits (ICs), software, and subsystems products in the United States, rest of North and South America, Europe, Japan, China, and rest of Asia. The company provides data converter products, which translate real-world analog signals into digital data, as well as translates digital data into analog signals; power management and reference products for power conversion, driver monitoring, sequencing, and energy management applications in the automotive, communications, industrial, and consumer markets; and power ICs that include performance, integration, and software design simulation tools for accurate power supply designs. It also offers amplifiers to condition analog signals; and radio frequency and microwave ICs to support cellular infrastructure; and micro-electro-mechanical systems technology solutions, including accelerometers used to sense acceleration, gyroscopes for sense rotation, inertial measurement units to sense multiple degrees of freedom, and broadband switches for radio and instrument systems, as well as isolators. In addition, the company provides digital signal processing and system products for numeric calculations. It serves clients in the industrial, automotive, consumer, instrumentation, aerospace, defense and healthcare, and communications markets through a direct sales force, third-party distributors, and independent sales representatives, as well as online. Analog Devices, Inc. was incorporated in 1965 and is headquartered in Wilmington, Massachusetts.",
    'FCX': "Freeport-McMoRan Inc. engages in the mining of mineral properties in North America, South America, and Indonesia. It primarily explores for copper, gold, molybdenum, silver, and other metals. The company's assets include the Grasberg minerals district in Indonesia; Morenci, Bagdad, Safford, Sierrita, and Miami in Arizona; Chino and Tyrone in New Mexico; and Henderson and Climax in Colorado, North America, as well as Cerro Verde in Peru and El Abra in Chile. The company was formerly known as Freeport-McMoRan Copper & Gold Inc. and changed its name to Freeport-McMoRan Inc. in July 2014. Freeport-McMoRan Inc. was incorporated in 1987 and is headquartered in Phoenix, Arizona.",
    'SQM': "Sociedad Química y Minera de Chile S.A. operates as a mining company worldwide. The company offers specialty plant nutrients, including sodium potassium nitrate, specialty blends, and other specialty fertilizers under Ultrasol, Qrop, Speedfol, Allganic, Ultrasoline, ProP, and Prohydric brands. It also provides iodine and its derivatives for use in medical, agricultural, industrial, and human and animal nutrition products comprising x-ray contrast media, biocides, antiseptics and disinfectants, pharmaceutical intermediates, polarizing films for LCD and LED screens, chemicals, organic compounds, and pigments, as well as added to edible salt to prevent iodine deficiency disorders. In addition, the company produces lithium carbonate and lithium hydroxide which are used in the production of cathode material for secondary batteries; lithium chloride; and basic lithium chemicals and lithium derivatives used in lubricating greases for heat-resistant glass, chips for the ceramic and glazing industry, and air conditioning chemicals, as well as other pharmaceutical syntheses and metal alloys. Further, it produces potassium sulfate; and potassium chloride which is used as a fertilizer for the application of potassium to different crops. Additionally, the company produces and markets industrial chemicals, such as sodium nitrate mainly used in the production of glass and explosives, metal processing and recycling, and production of insulating materials and adhesives; potassium nitrate used as a raw material to produce frits and special glass, as well as in the enamel, metal treatment, and pyrotechnic sectors; solar salts used as a thermal storage medium in solar power generation plants; and potassium chloride as an additive in oil drilling and food processing sectors. Furthermore, it is involved in the commercialization of third-party fertilizers; and exploration of copper, gold, and silver deposits. The company was founded in 1926 and is headquartered in Santiago, Chile.",
    'AA': "Alcoa Corporation, together with its subsidiaries, produces and sells bauxite, alumina, and aluminum products in the United States, Spain, Australia, Iceland, Norway, Brazil, Canada, and internationally. The company operates through two segments, Alumina and Aluminum. It engages in bauxite mining operations; and processes bauxite into alumina and sells it to customers who process it into industrial chemical products, as well as aluminum smelting and casting businesses. The company offers primary aluminum in the form of alloy ingot or value-add ingot to customers that produce products for the transportation, building and construction, packaging, wire, and other industrial markets; and flat-rolled aluminum in the form of sheet, which is sold primarily to customers that produce beverage and food cans. In addition, it owns hydro power plants that generates and sells electricity in the wholesale market to traders, large industrial consumers, distribution companies, and other generation companies. The company was formerly known as Alcoa Upstream Corporation and changed its name to Alcoa Corporation in October 2016. The company was founded in 1886 and is headquartered in Pittsburgh, Pennsylvania.",
    'WPM': "Wheaton Precious Metals Corp. primarily sells precious metals in North America, Europe, and South America. It produces and sells gold, silver, palladium, and cobalt deposits. The company was formerly known as Silver Wheaton Corp. and changed its name to Wheaton Precious Metals Corp. in May 2017. Wheaton Precious Metals Corp. was founded in 2004 and is headquartered in Vancouver, Canada.",
    'TSLA': "Tesla, Inc. designs, develops, manufactures, leases, and sells electric vehicles, and energy generation and storage systems in the United States, China, and internationally. The company operates in two segments, Automotive, and Energy Generation and Storage. The Automotive segment offers electric vehicles, as well as sells automotive regulatory credits; and non-warranty after-sales vehicle, used vehicles, body shop and parts, supercharging, retail merchandise, and vehicle insurance services. This segment also provides sedans and sport utility vehicles through direct and used vehicle sales, a network of Tesla Superchargers, and in-app upgrades; purchase financing and leasing services; services for electric vehicles through its company-owned service locations and Tesla mobile service technicians; and vehicle limited warranties and extended service plans. The Energy Generation and Storage segment engages in the design, manufacture, installation, sale, and leasing of solar energy generation and energy storage products, and related services to residential, commercial, and industrial customers and utilities through its website, stores, and galleries, as well as through a network of channel partners; and provision of service and repairs to its energy product customers, including under warranty, as well as various financing options to its solar customers. The company was formerly known as Tesla Motors, Inc. and changed its name to Tesla, Inc. in February 2017. Tesla, Inc. was incorporated in 2003 and is headquartered in Austin, Texas.",
    'F': "Ford Motor Company develops, delivers, and services a range of Ford trucks, commercial cars and vans, sport utility vehicles, and Lincoln luxury vehicles worldwide. It operates through Ford Blue, Ford Model e, and Ford Pro; Ford Next; and Ford Credit segments. The company sells Ford and Lincoln vehicles, service parts, and accessories through distributors and dealers, as well as through dealerships to commercial fleet customers, daily rental car companies, and governments. It also engages in vehicle-related financing and leasing activities to and through automotive dealers. In addition, the company provides retail installment sale contracts for new and used vehicles; and direct financing leases for new vehicles to retail and commercial customers, such as leasing companies, government entities, daily rental companies, and fleet customers. Further, it offers wholesale loans to dealers to finance the purchase of vehicle inventory; and loans to dealers to finance working capital and enhance dealership facilities, purchase dealership real estate, and other dealer vehicle programs. The company was incorporated in 1903 and is based in Dearborn, Michigan.",
    'GM': "General Motors Company designs, builds, and sells trucks, crossovers, cars, and automobile parts; and provide software-enabled services and subscriptions worldwide. The company operates through GM North America, GM International, Cruise, and GM Financial segments. It markets its vehicles primarily under the Buick, Cadillac, Chevrolet, GMC, Baojun, and Wuling brand names. In addition, the company sells trucks, crossovers, cars, and automobile parts through retail dealers, and distributors and dealers, as well as to fleet customers, including daily rental car companies, commercial fleet customers, leasing companies, and governments. Further, it offers range of after-sale services through dealer network, such as maintenance, light repairs, collision repairs, vehicle accessories, and extended service warranties. Additionally, the company provides automotive financing; and software-enabled services and subscriptions. General Motors Company was founded in 1908 and is headquartered in Detroit, Michigan.",
    'LCID': "Lucid Group, Inc. a technology company, designs, engineers, manufactures, and sells electric vehicles (EV), EV powertrains, and battery systems. It also designs and develops proprietary software in-house for Lucid vehicles. The company sells vehicles directly to consumers through its retail sales network and direct online sales, including Lucid Financial Services. Lucid Group, Inc. was founded in 2007 and is headquartered in Newark, California. Lucid Group, Inc. operates as a subsidiary of Ayar Third Investment Company.",
    'ALB': "Albemarle Corporation develops, manufactures, and markets engineered specialty chemicals worldwide. It operates through three segments: Energy Storage, Specialties and Ketjen. The Energy Storage segment offers lithium compounds, including lithium carbonate, lithium hydroxide, and lithium chloride; technical services for the handling and use of reactive lithium products; and lithium-containing by-products recycling services. The Specialties segment provides bromine-based specialty chemicals, including elemental bromine, alkyl and inorganic bromides, brominated powdered activated carbon, and other bromine fine chemicals; lithium specialties, such as butyllithium and lithium aluminum hydride; develops and manufactures cesium products for the chemical and pharmaceutical industries; and zirconium, barium, and titanium products for pyrotechnical applications that include airbag initiators. The Ketjen segment offers clean fuels technologies (CFT), which is composed of hydroprocessing catalysts (HPC) together with isomerization and akylation catalysts; fluidized catalytic cracking (FCC) catalysts and additives; and performance catalyst solutions (PCS), which is composed of organometallics and curatives. The company serves the energy storage, petroleum refining, consumer electronics, construction, automotive, lubricants, pharmaceuticals, and crop protection markets. Albemarle Corporation was founded in 1887 and is headquartered in Charlotte, North Carolina.",
    'HON': "Honeywell International Inc. engages in the aerospace technologies, building automation, energy and sustainable solutions, and industrial automation businesses in the United States, Europe, and internationally. The company's Aerospace segment offers auxiliary power units, propulsion engines, integrated avionics, environmental control and electric power systems, engine controls, flight safety, communications, navigation hardware, data and software applications, radar and surveillance systems, aircraft lighting, advanced systems and instruments, satellite and space components, and aircraft wheels and brakes; spare parts; repair, overhaul, and maintenance services; and thermal systems, as well as wireless connectivity services. Its Honeywell Building Technologies segment provides software applications for building control and optimization; sensors, switches, control systems, and instruments for energy management; access control; video surveillance; fire products; and installation, maintenance, and upgrades of systems. The company's Performance Materials and Technologies segment offers automation control, instrumentation, and software and related services; catalysts and adsorbents, equipment, and consulting; and materials to manufacture end products, such as bullet-resistant armor, nylon, computer chips, and pharmaceutical packaging, as well as provides materials based on hydrofluoro-olefin technology. Its Safety and Productivity Solutions segment provides personal protective equipment, apparel, gear, and footwear; gas detection technology; custom-engineered sensors, switches, and controls for sensing and productivity solution; cloud-based notification and emergency messaging; mobile devices and software; custom-engineered sensors, switches, and controls; and data and asset management productivity solutions. Honeywell International Inc. was founded in 1885 and is headquartered in Charlotte, North Carolina.",
    'ENS': "EnerSys engages in the provision of stored energy solutions for industrial applications worldwide. It operates in four segments: Energy Systems, Motive Power, Specialty, and New Ventures. The Energy Systems segment offers uninterruptible power systems (UPS) applications for computer and computer-controlled systems, as well as telecommunications systems; switchgear and electrical control systems used in industrial facilities and electric utilities, large-scale energy storage, and energy pipelines; integrated power solutions and services to broadband, telecom, data center, and renewable and industrial customers; and thermally managed cabinets and enclosures for electronic equipment and batteries. The Motive Power segment provides power solutions for electric industrial forklifts used in manufacturing, warehousing, and other material handling applications, as well as automated guided vehicles, mining equipment, and diesel locomotive starting and other rail equipment. The Specialty offers batteries for starting, lighting, and ignition applications in automotive and over-the-road trucks; and energy solutions for satellites, spacecraft, commercial aircraft, military land vehicles, aircraft, submarines, ships, and other tactical vehicles, as well as medical devices and equipment. The New Venture segment provides energy storage and management systems for demand charge reduction, utility back-up power, and dynamic fast charging for electric vehicles. The company also offers battery chargers, power equipment, battery accessories, and outdoor cabinet enclosures, as well as related after-market and customer-support services for industrial batteries. It sells its products through a network of distributors, independent representatives, and internal sales forces. The company was formerly known as Yuasa, Inc. and changed its name to EnerSys in January 2001. EnerSys was founded in 1991 and is headquartered in Reading, Pennsylvania.",
    'LTHM': 'Bankruptcy',
    'GCUSD': "Gold is a chemical element; it has symbol Au (from the Latin word aurum) and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.",
    'SIUSD': "Silver is a chemical element; it has symbol Ag (from Latin argentum 'silver', derived from Proto-Indo-European *h₂erǵ 'shiny, white')) and atomic number 47. A soft, white, lustrous transition metal, it exhibits the highest electrical conductivity, thermal conductivity, and reflectivity of any metal.[8] The metal is found in the Earth's crust in the pure, free elemental form ('native silver'), as an alloy with gold and other metals, and in minerals such as argentite and chlorargyrite. Most silver is produced as a byproduct of copper, gold, lead, and zinc refining.",
    'HGUSD': "Copper is a chemical element; it has symbol Cu (from Latin cuprum) and atomic number 29. It is a soft, malleable, and ductile metal with very high thermal and electrical conductivity. A freshly exposed surface of pure copper has a pinkish-orange color. Copper is used as a conductor of heat and electricity, as a building material, and as a constituent of various metal alloys, such as sterling silver used in jewelry, cupronickel used to make marine hardware and coins, and constantan used in strain gauges and thermocouples for temperature measurement.",
    'PLUSD': "Platinum is a chemical element; it has symbol Pt and atomic number 78. It is a dense, malleable, ductile, highly unreactive, precious, silverish-white transition metal. Its name originates from Spanish platina, a diminutive of plata 'silver'. Platinum is a member of the platinum group of elements and group 10 of the periodic table of elements. It has six naturally occurring isotopes. It is one of the rarer elements in Earth's crust, with an average abundance of approximately 5 μg/kg. It occurs in some nickel and copper ores along with some native deposits, mostly in South Africa, which accounts for ~80% of the world production. Because of its scarcity in Earth's crust, only a few hundred tonnes are produced annually, and given its important uses, it is highly valuable and is a major precious metal commodity.",
    'ALIUSD': "Aluminium (Aluminum in North American English) is a chemical element; it has symbol Al and atomic number 13. Aluminium has a density lower than that of other common metals, about one-third that of steel. It has a great affinity towards oxygen, forming a protective layer of oxide on the surface when exposed to air. Aluminium visually resembles silver, both in its color and in its great ability to reflect light. It is soft, nonmagnetic, and ductile. It has one stable isotope, Al, which is highly abundant, making aluminium the twelfth-most common element in the universe. The radioactivity of 26Al, a more unstable isotope, leads to it being used in radiometric dating.",
    'CLUSD': "Petroleum or crude oil, also referred to as simply oil, is a naturally occurring yellowish-black liquid mixture of mainly hydrocarbons,[1] and is found in geological formations. The name petroleum covers both naturally occurring unprocessed crude oil and petroleum products that consist of refined crude oil. Petroleum is primarily recovered by oil drilling. Drilling is carried out after studies of structural geology, sedimentary basin analysis, and reservoir characterization. Unconventional reserves such as oil sands and oil shale exist.",
    'NGUSD': "Natural gas (also called fossil gas, methane gas or simply gas) is a naturally occurring mixture of gaseous hydrocarbons consisting primarily of methane (95%)[1] in addition to various smaller amounts of other higher alkanes. Traces of carbon dioxide, nitrogen, hydrogen sulfide, and helium are also usually present.[2] Methane is colorless and odorless, and the second largest greenhouse gas contributor to global climate change after carbon dioxide.[3] Because natural gas is odorless, odorizers such as mercaptan (which smells like sulfur or rotten eggs) are commonly added to it for safety so that leaks can be readily detected.",
    'PAUSD': "Palladium is a chemical element; it has symbol Pd and atomic number 46. It is a rare and lustrous silvery-white metal discovered in 1802 by the English chemist William Hyde Wollaston. He named it after the asteroid Pallas, which was itself named after the epithet of the Greek goddess Athena, acquired by her when she slew Pallas. Palladium, platinum, rhodium, ruthenium, iridium and osmium form a group of elements referred to as the platinum group metals (PGMs). They have similar chemical properties, but palladium has the lowest melting point and is the least dense of them."
}

In [None]:
basic_info = {
    'NEE': 'NextEra Energy, Inc. generates electricity primarily from wind, solar, nuclear, and natural gas. It focuses on clean energy solutions and operates energy storage and transmission infrastructure. The raw materials required include wind, sunlight, uranium (for nuclear energy), natural gas, and transmission infrastructure like substations and transmission lines. It serves around 12 million customers in Florida, providing electricity generation and storage services.',

    'BEP': 'Brookfield Renewable Partners L.P. owns renewable power facilities, generating electricity through hydroelectric, wind, solar, and other renewable sources like renewable natural gas and carbon capture. The company uses water (for hydroelectric power), wind, and sunlight as key raw materials. It operates in North and South America and focuses on renewable energy production and power storage solutions.',

    'FSLR': 'First Solar, Inc. manufactures thin-film photovoltaic (PV) solar modules, which are made using cadmium telluride, a key raw material. These solar modules provide an alternative to traditional silicon-based PV technology, with applications in utility-scale solar energy projects. The company relies on cadmium telluride, glass, and other semiconductor materials for production, serving utility companies and developers globally.',

    'SEDG': 'SolarEdge Technologies, Inc. designs and manufactures solar inverters, power optimizers, and battery storage solutions for solar installations. Its products rely on semiconductors, lithium-ion batteries, and other electrical components. The company serves the solar and energy storage sectors, and it also provides software for energy management and monitoring.',

    'RUN': 'Sunrun Inc. develops and installs residential solar energy systems and battery storage solutions. The company uses solar panels (photovoltaic cells), racking systems, lithium-ion batteries, and electrical components. Its primary customers are homeowners, and it focuses on providing energy independence through solar and energy storage.',

    'AMD': 'Advanced Micro Devices, Inc. designs and manufactures semiconductors, processors (Ryzen, EPYC), and GPUs (Radeon) for various industries, including gaming, data centers, and cloud services. It relies on raw materials such as silicon, copper, and rare earth metals for its semiconductor manufacturing. AMD products serve original equipment manufacturers and cloud service providers worldwide.',

    'NVDA': 'NVIDIA Corporation develops GPUs for gaming, AI, and data centers, as well as hardware for automotive and robotics industries. It relies on semiconductors, silicon, rare earth metals, and high-performance computing infrastructure. NVIDIA serves the gaming industry, automotive, AI research, and cloud computing services globally.',

    'TXN': 'Texas Instruments designs and manufactures semiconductors and integrated circuits, including analog and embedded processing chips used in industries like automotive, industrial, and personal electronics. It relies on raw materials such as silicon, metals, and chemicals used in chip fabrication. The company serves a broad range of markets, including power management, signal processing, and communication systems.',

    'AVGO': 'Broadcom Inc. designs semiconductors for telecommunications, networking, and consumer electronics applications. The company relies on raw materials like silicon, copper, and rare earth metals for its semiconductor devices. It produces chips used in data centers, smartphones, broadband access, and telecommunication equipment.',

    'ADI': 'Analog Devices, Inc. designs and manufactures integrated circuits for power management, signal processing, and communication systems. It relies on raw materials such as silicon, semiconductors, and metals. The company serves industries like automotive, industrial, healthcare, and communications, providing products for power conversion and signal chain applications.',

    'FCX': 'Freeport-McMoRan Inc. is a mining company that extracts copper, gold, and molybdenum. Its key raw materials are mineral ores, and it operates large-scale mining operations in North America, South America, and Indonesia. The company’s products are used in industries like electronics, construction, and energy infrastructure.',

    'SQM': 'Sociedad Química y Minera de Chile S.A. is a leading producer of lithium, potassium, and iodine. It uses raw materials such as brine from salt flats (for lithium production) and potassium nitrate. SQM serves industries like battery manufacturing (especially for electric vehicles), agriculture (fertilizers), and pharmaceuticals (iodine-based products).',

    'AA': 'Alcoa Corporation produces and sells bauxite, alumina, and aluminum products. It operates in bauxite mining, alumina refining, and aluminum smelting. Raw materials include bauxite for alumina production and energy sources for smelting, such as hydro power from its plants. The company provides aluminum products for various industries, including transportation, construction, and packaging. It also generates electricity through hydro power for the wholesale market.',

    'WPM': 'Wheaton Precious Metals Corp. primarily sells gold, silver, palladium, and cobalt. It acquires these metals through streaming agreements, where it purchases precious metals from mining companies. Raw materials include gold, silver, palladium, and cobalt ores. The company operates across North America, Europe, and South America, and sells these precious metals to various industries and investors.',

    'TSLA': 'Tesla, Inc. designs and manufactures electric vehicles (EVs) and energy storage systems. Key raw materials include lithium, cobalt, and nickel for batteries; aluminum and steel for vehicle components; and silicon for solar panels. Tesla also manufactures energy generation products like solar panels and energy storage solutions. The company sells its products through direct and online channels and provides related services such as financing and repairs.',

    'F': 'Ford Motor Company develops and services a range of vehicles, including trucks, cars, and SUVs. Raw materials include steel, aluminum, and various components for vehicle manufacturing. Ford also engages in vehicle-related financing and leasing. It operates globally through dealerships and provides retail installment sale contracts and direct financing leases for customers and dealers.',

    'GM': 'General Motors Company designs, builds, and sells vehicles under brands like Chevrolet and Cadillac. Key raw materials include steel, aluminum, and various components for vehicle assembly. GM also offers automotive financing and software-enabled services. The company operates through retail dealers and provides vehicle maintenance, repairs, and aftermarket services.',

    'LCID': 'Lucid Group, Inc. designs and manufactures electric vehicles (EVs) and powertrains. It uses raw materials such as lithium, cobalt, and nickel for batteries; and aluminum and steel for vehicle parts. Lucid sells its vehicles directly to consumers and provides proprietary software for its vehicles. The company operates through a network of retail locations and online sales channels.',

    'ALB': 'Albemarle Corporation manufactures specialty chemicals, including lithium compounds for batteries, bromine-based chemicals, and clean fuels technologies. Key raw materials include lithium brine for lithium compounds, bromine, and various chemicals for clean fuels. The company serves markets like energy storage, pharmaceuticals, and automotive, focusing on specialty chemicals and performance materials.',

    'HON': 'Honeywell International Inc. operates in aerospace, building automation, energy solutions, and industrial automation. Raw materials include metals for aerospace components, chemicals for performance materials, and various electronic components for automation. Honeywell provides products and services in sectors like aerospace, building management, and industrial safety.',

    'ENS': 'EnerSys provides energy storage solutions, including batteries and uninterruptible power systems (UPS). Raw materials include lead, lithium, and other components for battery manufacturing. The company serves various sectors such as telecommunications, industrial applications, and energy storage, offering products like UPS systems, motive power batteries, and specialty batteries.',

    'LTHM': 'Livent Corporation (formerly LTHM) focuses on lithium compounds for energy storage. It uses lithium brine and other raw materials for producing lithium hydroxide and carbonate. The company supports industries such as electric vehicles and energy storage systems.',

    'GCUSD': 'Gold is a chemical element used in various applications, including jewelry, electronics, and investment. It is a soft, malleable metal with a bright yellow color. Gold is mined from ores and alluvial deposits, and its properties make it valuable for electronics, dentistry, and as a monetary asset.',

    'SIUSD': 'Silver is a transition metal used in electronics, jewelry, and as an investment. It has high electrical and thermal conductivity and is often produced as a byproduct of mining other metals. Silver is found in ores and is refined for various industrial applications and investment products.',

    'HGUSD': 'Copper is a highly conductive metal used in electrical wiring, plumbing, and various alloys. It is mined from ores and refined into various products. Copper is used in construction, electronics, and manufacturing due to its excellent conductivity and malleability.',

    'PLUSD': 'Platinum is a dense, unreactive metal used in catalytic converters, jewelry, and various industrial applications. It is mined from ores and is highly valued for its rarity and properties. Platinum is used in automotive catalysts, chemical processing, and as an investment asset.',

    'ALIUSD': 'Aluminium is a lightweight metal used in construction, packaging, and transportation. It is extracted from bauxite ore and processed into various forms. Aluminium is used in a wide range of applications due to its strength, low density, and resistance to corrosion.',

    'CLUSD': 'Petroleum (crude oil) is a naturally occurring liquid used for fuel, energy, and raw materials for various products. It is extracted through drilling and refined into gasoline, diesel, and other petrochemical products. Petroleum is used in transportation, heating, and as a feedstock for chemicals.',

    'NGUSD': 'Natural gas is a fossil fuel primarily composed of methane used for heating, electricity generation, and as an industrial feedstock. It is extracted through drilling and processed for various uses, including energy production and as a chemical feedstock.',

    'PAUSD': 'Palladium is a rare, precious metal used in catalytic converters, electronics, and jewelry. It is mined from ores and is valued for its properties in catalysis and electronics. Palladium is used in automotive catalysts, electronics, and as a financial investment.'
}



In [None]:
df_companies = company_data
df_commodities = commodity_data
df_companies.columns

Index(['NEE', 'BEP', 'FSLR', 'SEDG', 'RUN', 'AMD', 'NVDA', 'TXN', 'AVGO',
       'ADI', 'FCX', 'SQM', 'AA', 'WPM', 'TSLA', 'F', 'GM', 'LCID', 'ALB',
       'HON', 'ENS', 'LTHM'],
      dtype='object', name='Ticker')

In [None]:
import pandas as pd

def calculate_daily_price_change(df):
    # Ensure the index is sorted by date
    df = df.sort_index()

    # Create an empty DataFrame to store the price changes
    daily_percentage_change = pd.DataFrame(index=df.index, columns=df.columns)

    # Iterate over each stock column
    for column in df.columns:
        # Iterate over each date index starting from the second row
        for i in range(1, len(df)):
            # Calculate the percentage price change
            previous_price = df.iloc[i-1, df.columns.get_loc(column)]
            current_price = df.iloc[i, df.columns.get_loc(column)]
            daily_percentage_change.iloc[i, df.columns.get_loc(column)] = ((current_price - previous_price) / previous_price) * 100

    return daily_percentage_change

In [None]:
df=calculate_daily_price_change(df_companies)
df

Ticker,NEE,BEP,FSLR,SEDG,RUN,AMD,NVDA,TXN,AVGO,ADI,...,AA,WPM,TSLA,F,GM,LCID,ALB,HON,ENS,LTHM
2019-06-17,,,,,,,,,,,...,,,,,,,,,,
2019-06-18,-0.445823,0.33241,3.16705,3.48118,6.904623,4.280822,5.23416,4.047976,4.541866,4.605138,...,4.110855,1.025203,-0.133333,0.497512,1.54953,0.0,-0.659742,1.966814,1.249199,-1.544944
2019-06-19,0.389408,0.828272,-0.143153,1.092896,-0.054735,0.164204,0.26178,0.23415,-0.201403,1.000546,...,-0.621118,0.211416,0.801068,-0.594059,0.217984,0.0,-2.585841,-0.193457,1.217969,1.141227
2019-06-20,0.07758,0.657174,0.414145,0.810811,2.683461,-1.606557,0.522193,1.266846,0.562182,1.368876,...,0.491071,3.080169,-3.046358,0.0,0.489396,0.0,0.797795,0.501682,2.187842,-3.667137
2019-06-21,0.562016,-0.108814,-0.206218,-0.469169,-1.226667,-3.032323,-1.558442,-0.346021,-1.813295,0.986141,...,0.399822,-1.146132,1.02459,-0.498008,-0.108225,0.0,-1.654914,-1.123149,0.550543,2.04978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-06,-0.45425,-0.429646,0.142826,-1.226492,-3.087248,0.367094,-1.176086,0.081599,-0.836465,0.734046,...,0.140878,3.59672,1.68,0.0,-0.02192,0.0,-0.396123,-0.391838,-0.913668,0.0
2024-06-07,-1.707953,-4.243078,-2.234412,-4.201159,-6.855956,0.653556,-0.090909,-0.321035,0.383224,-1.112,...,-4.079719,-5.396654,-0.258514,0.6628,0.241175,-2.150538,-2.741581,0.158311,-1.872412,0.0
2024-06-10,2.095769,1.61472,4.660732,1.296176,5.204461,-4.485614,0.744478,1.027555,2.405022,0.255569,...,0.611098,2.319833,-2.079108,1.893004,4.046369,0.0,-0.765617,0.938787,1.102694,0.0
2024-06-11,-5.495648,-3.362897,5.264475,-2.132651,2.826855,-0.860671,-0.722555,-0.091084,1.427312,0.395123,...,-2.308066,-0.892028,-1.801024,-2.180937,1.345386,-2.564103,-0.078906,-0.597893,0.891502,0.0


In [None]:
Y = 5  # Percentage change threshold
LAG = 1  # Lag amount of days



# Function to determine ext_roll based on date
def get_ext_roll(date, date_ranges):
    for ext_roll, (start_date, end_date) in date_ranges.items():
        if start_date <= date <= end_date:
            return ext_roll
    return None

# Function to create the relationships dataframe
def create_relationships(df_commodities, df_companies, nodes_dict, Y, LAG):
    relationships = []

    # Calculate daily percentage change
    df_commodities_pct_change = calculate_daily_price_change(df_commodities)
    df_companies_pct_change = calculate_daily_price_change(df_companies)

    # Combine both dataframes
    df_combined = pd.concat([df_commodities_pct_change, df_companies_pct_change], axis=1)

    # Define the date ranges and their corresponding ext_roll values
    train_pct = 0.8
    test_pct = 0.1
    validation_pct = 0.1

    df_combined = pd.concat([df_commodities_pct_change, df_companies_pct_change], axis=1)

    train_data_last_index = int(len(df_combined) * 0.8)
    val_data_first_index = int(len(df_combined) * 0.8) + 1
    val_data_last_index = int(len(df_combined) * 0.9)

    start_date = '2019-06-17'
    train_data_last_date = str(df_combined.index[train_data_last_index]).split(' ')[0]

    val_data_first_date = str(df_combined.index[val_data_first_index]).split(' ')[0]
    val_data_last_date = str(df_combined.index[val_data_last_index]).split(' ')[0]

    test_data_first_date = str(df_combined.index[val_data_last_index + 1]).split(' ')[0]
    last_date = str(df_combined.index[-1]).split(' ')[0]

    print(train_data_last_date)

    date_ranges = {
        0: (start_date, train_data_last_date),
        1: (val_data_first_date, val_data_last_date),
        2: (test_data_first_date, last_date)
    }

    # Convert date ranges to datetime
    date_ranges = {k: (pd.to_datetime(v[0]), pd.to_datetime(v[1])) for k, v in date_ranges.items()}

    # Iterate over each row and each column to find significant changes
    for date in df_combined.index:
        ext_roll = get_ext_roll(date, date_ranges)
        if ext_roll is not None:
            for src in df_combined.columns:
                if abs(df_combined.loc[date, src]) >= Y:
                    src_index = nodes_dict[src]
                    lagged_date = date + pd.Timedelta(days=LAG)
                    if lagged_date in df_combined.index:
                        for dst in df_combined.columns:
                            if src != dst and abs(df_combined.loc[lagged_date, dst]) >= Y and (df_combined.loc[lagged_date, dst]*df_combined.loc[date, src]>0): # here we control pos+neg/pos/neg
                                dst_index = nodes_dict[dst]
                                relationships.append([src_index, dst_index, date, ext_roll])

    # Create the new dataframe
    relationships_df = pd.DataFrame(relationships, columns=['src', 'dst', 'time', 'ext_roll'])
    return relationships_df

# Create the relationships dataframe
relationships_df = create_relationships(df_commodities, df_companies, nodes_dict, Y, LAG)

# Display the resulting dataframe
relationships_df

2023-06-19


Unnamed: 0,src,dst,time,ext_roll
0,6,26,2019-06-26,0
1,4,7,2019-07-23,0
2,7,14,2019-07-24,0
3,7,15,2019-07-24,0
4,5,27,2019-07-31,0
...,...,...,...,...
9435,28,6,2024-06-04,2
9436,28,8,2024-06-04,2
9437,4,28,2024-06-10,2
9438,4,0,2024-06-10,2


In [None]:
# Filter the relationships_df for rows where src = 28 and dst = 8
filtered_rows = relationships_df[(relationships_df['src'] == 28) & (relationships_df['dst'] == 8)]

# Display the filtered rows
print(f"Filtered rows where src = 28 and dst = 8:\n", filtered_rows)

Filtered rows where src = 28 and dst = 8:
       src  dst       time  ext_roll
531    28    8 2020-03-10         0
1958   28    8 2020-03-18         0
8171   28    8 2022-11-29         0
8278   28    8 2023-01-05         0
9115   28    8 2024-01-18         2
9202   28    8 2024-02-21         2
9253   28    8 2024-03-07         2
9436   28    8 2024-06-04         2


In [None]:
def find_mode_values(df):
    # Mode values and their frequencies for 'src'
    mode_src_count = df['src'].value_counts().head(5)

    # Mode values and their frequencies for 'dst'
    mode_dst_count = df['dst'].value_counts().head(5)

    # Mode values and their frequencies for the pair (src, dst)
    mode_src_dst_count = df.groupby(['src', 'dst']).size().sort_values(ascending=False).head(5)

    # Print results
    print("Top 3 mode values for 'src':")
    print(mode_src_count)

    print("\nTop 3 mode values for 'dst':")
    print(mode_dst_count)

    print("\nTop 3 mode values for the pair (src, dst):")
    print(mode_src_dst_count)

find_mode_values(relationships_df)

Top 3 mode values for 'src':
src
4     705
21    624
3     565
14    559
17    529
Name: count, dtype: int64

Top 3 mode values for 'dst':
dst
4     803
21    582
3     577
28    557
12    550
Name: count, dtype: int64

Top 3 mode values for the pair (src, dst):
src  dst
17   4      76
28   4      73
3    4      64
21   4      61
4    3      59
dtype: int64


In [None]:
def check_node_usage(df):
    # Get the set of all nodes
    all_nodes = set(df['src']).union(set(df['dst']))

    # Nodes that are never src
    never_src = all_nodes - set(df['src'])

    # Nodes that are never dst
    never_dst = all_nodes - set(df['dst'])

    # Nodes that are never src or dst
    never_src_or_dst = never_src.union(never_dst)

    # Print results
    if never_src:
        print(f"Nodes that are never used as src: {never_src}")
    else:
        print("All nodes are used as src.")

    if never_dst:
        print(f"Nodes that are never used as dst: {never_dst}")
    else:
        print("All nodes are used as dst.")

    if never_src_or_dst:
        print(f"Nodes that are never used as either src or dst: {never_src_or_dst}")
    else:
        print("All nodes are used as either src or dst.")

# Example usage:
# Assuming relationships_df is your DataFrame
check_node_usage(relationships_df)

All nodes are used as src.
All nodes are used as dst.
All nodes are used as either src or dst.


In [None]:
# Define the path to the pickle file
pickle_path = '/Data/temporal_features_for_edges_dict_percent.pkl'

# Load the dictionary from the pickle file
with open(pickle_path, 'rb') as file:
    features_dict = pickle.load(file)

In [None]:
def get_node_features(ticker, date_str):
    if ticker in features_dict and date_str in features_dict[ticker]:
        # return features_dict[ticker][date_str][7] for all features
        return [features_dict[ticker][date_str][7]] # for price only
    # return [0] * 14 # this is for all the features
    return [0] # this is for price only

edge_features = []

# Iterate over each edge in the DataFrame
for _, row in relationships_df.iterrows():
    src = reversed_node_dict[row['src']]
    dst = reversed_node_dict[row['dst']]
    date_str = row['time'].strftime('%Y-%m-%d')

    # Get features for source and destination nodes
    src_features = get_node_features(src, date_str)
    dst_features = get_node_features(dst, date_str)

    # Concatenate source and destination features
    edge_feature = src_features + dst_features

    # Append the edge feature to the list
    edge_features.append(edge_feature)

In [None]:
edge_features[19]

[19.858156028368796, 6.730769230769241]

In [None]:
first_date = relationships_df['time'].min()
relationships_df['time'] = (relationships_df['time'] - first_date).dt.days


In [None]:
#relationships_df['time'] = relationships_df['time'] * 0.1
relationships_df

Unnamed: 0,src,dst,time,ext_roll
0,6,26,0,0
1,4,7,27,0
2,7,14,28,0
3,7,15,28,0
4,5,27,35,0
...,...,...,...,...
9435,28,6,1805,2
9436,28,8,1805,2
9437,4,28,1811,2
9438,4,0,1811,2


In [None]:
relationships_df.query('ext_roll == 2')

Unnamed: 0,src,dst,time,ext_roll
9059,3,4,1637,2
9060,3,12,1637,2
9061,3,17,1637,2
9062,3,21,1637,2
9063,4,3,1637,2
...,...,...,...,...
9435,28,6,1805,2
9436,28,8,1805,2
9437,4,28,1811,2
9438,4,0,1811,2


In [None]:
!pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-3.3.0-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.3.0-py3-none-any.whl (268 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.7/268.7 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
  Attempting uninstall: sentence-transformers
    Found existing installation: sentence-transformers 3.2.1
    Uninstalling sentence-transformers-3.2.1:
      Successfully uninstalled sentence-transformers-3.2.1
Successfully installed sentence-transformers-3.3.0


In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
import torch

# Example tensor with shape (num nodes, dim node features)
node_features = []
for node in basic_info.values():
  node_features.append(model.encode(node))

In [None]:
tensor = torch.tensor(node_features)
tensor_edge = torch.tensor(edge_features)

  tensor = torch.tensor(node_features)


In [None]:
if tensor.dtype != tensor_edge.dtype:
    print("Data types are different. Converting tensor_edge to float32.")
    tensor_edge = tensor_edge.float()  # Convert tensor_edge to float32
else:
    print("Data types are the same.")

Data types are different. Converting tensor_edge to float32.


In [None]:
tensor.shape

torch.Size([30, 384])

In [None]:
tensor_edge.shape

torch.Size([9440, 2])

In [None]:
torch.save(tensor, 'node_features.pt')
torch.save(tensor_edge, 'edge_features.pt')

In [None]:
relationships_df.to_csv('edges.csv')