## Importing Data: Working with CSV Files

In [1]:
import pandas as pd

# Load the CSV file 'model_logs.csv' into a DataFrame
df = pd.read_csv('model_logs.csv')

In [4]:
df.head()

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Generate a creative story about space travel,62,287
1,2023-01-02,Write a sci-fi short story set in 2050,45,361
2,2023-01-03,Write a poem about the future of technology,33,221
3,2023-01-04,Explain quantum computing in simple terms,53,290
4,2023-01-05,Explain quantum computing in simple terms,75,392


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Date                100 non-null    object
 1   Prompt              100 non-null    object
 2   Response Time (ms)  100 non-null    int64 
 3   Tokens Generated    100 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 3.3+ KB


In [5]:
df.describe()

Unnamed: 0,Response Time (ms),Tokens Generated
count,100.0,100.0
mean,66.65,342.93
std,21.114108,90.095264
min,30.0,203.0
25%,47.5,274.75
50%,70.0,324.5
75%,86.0,427.0
max,99.0,500.0


In [6]:
df['Date'] = pd.to_datetime(df['Date'])

In [7]:
df.dtypes

Date                  datetime64[ns]
Prompt                        object
Response Time (ms)             int64
Tokens Generated               int64
dtype: object

In [8]:
df = pd.read_csv('model_logs.csv', parse_dates=['Date'])

In [9]:
df.dtypes

Date                  datetime64[ns]
Prompt                        object
Response Time (ms)             int64
Tokens Generated               int64
dtype: object

In [10]:
mask = df['Response Time (ms)'] > 50
slow_responses = df[mask]
slow_responses

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Generate a creative story about space travel,62,287
3,2023-01-04,Explain quantum computing in simple terms,53,290
4,2023-01-05,Explain quantum computing in simple terms,75,392
5,2023-01-06,Generate marketing copy for a new tech product,61,250
6,2023-01-07,Write a sci-fi short story set in 2050,87,370
...,...,...,...,...
93,2023-04-04,Explain quantum computing in simple terms,97,325
95,2023-04-06,Explain quantum computing in simple terms,61,304
96,2023-04-07,Write a sci-fi short story set in 2050,57,238
98,2023-04-09,Generate marketing copy for a new tech product,77,296


In [11]:
slow_responses.to_csv('slow_responses.csv', index=False)
print("Filtered data saved to 'slow_responses.csv'!")

Filtered data saved to 'slow_responses.csv'!


##  Exporting Data to Different Formats: Excel, JSON, SQL, YAML

In [12]:
import pandas as pd
df = pd.read_csv('model_logs.csv')
df

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Generate a creative story about space travel,62,287
1,2023-01-02,Write a sci-fi short story set in 2050,45,361
2,2023-01-03,Write a poem about the future of technology,33,221
3,2023-01-04,Explain quantum computing in simple terms,53,290
4,2023-01-05,Explain quantum computing in simple terms,75,392
...,...,...,...,...
95,2023-04-06,Explain quantum computing in simple terms,61,304
96,2023-04-07,Write a sci-fi short story set in 2050,57,238
97,2023-04-08,Write a poem about the future of technology,38,465
98,2023-04-09,Generate marketing copy for a new tech product,77,296


### Exporting to Excel

In [None]:
pip install openpyxl -q

In [25]:
# Export the DataFrame 'df' to an Excel file 'data.xlsx' without including the index
df.to_excel('data.xlsx', index=False)

print("Data exported to 'data.xlsx'")

Data exported to 'data.xlsx'


###  Exporting to JSON

In [23]:
# Convert the DataFrame 'df' to a JSON file 'data.json' with columns as keys
df.to_json('data.json', orient='columns')

In [24]:
# Convert the DataFrame 'df' to a JSON Lines file 'chat_data.jsonl' with records as separate lines
df.to_json('chat_data.jsonl', orient='records', lines=True)

##  Exporting to SQL

In [22]:
import sqlite3
# Connect to the SQLite database 'chat_data.db'
conn = sqlite3.connect('chat_data.db')

# Write the DataFrame 'df' to the 'chat_data.db' table, replacing the table if it exists
df.to_sql('chat_data.db', conn, if_exists='replace', index=False)

# Close the database connection
conn.close()

## Exporting to YAML

In [21]:
import yaml
# Convert the DataFrame to a list of dictionaries, one per row
data_dict = df.to_dict(orient='records')

In [19]:
data_dict

[{'Date': '2023-01-01',
  'Prompt': 'Generate a creative story about space travel',
  'Response Time (ms)': 62,
  'Tokens Generated': 287},
 {'Date': '2023-01-02',
  'Prompt': 'Write a sci-fi short story set in 2050',
  'Response Time (ms)': 45,
  'Tokens Generated': 361},
 {'Date': '2023-01-03',
  'Prompt': 'Write a poem about the future of technology',
  'Response Time (ms)': 33,
  'Tokens Generated': 221},
 {'Date': '2023-01-04',
  'Prompt': 'Explain quantum computing in simple terms',
  'Response Time (ms)': 53,
  'Tokens Generated': 290},
 {'Date': '2023-01-05',
  'Prompt': 'Explain quantum computing in simple terms',
  'Response Time (ms)': 75,
  'Tokens Generated': 392},
 {'Date': '2023-01-06',
  'Prompt': 'Generate marketing copy for a new tech product',
  'Response Time (ms)': 61,
  'Tokens Generated': 250},
 {'Date': '2023-01-07',
  'Prompt': 'Write a sci-fi short story set in 2050',
  'Response Time (ms)': 87,
  'Tokens Generated': 370},
 {'Date': '2023-01-08',
  'Prompt': '

In [20]:
with open('chat_data.yaml', 'w') as file:
    yaml.dump(data_dict, file)
    print('Data exported to chat_data.yaml!')

Data exported to chat_data.yaml!
