In [22]:
import os
import requests
import pandas as pd
import json
import random

# Ensure the data directory exists
data_dir = './data'
os.makedirs(data_dir, exist_ok=True)

# Fetch JSON data from JSONPlaceholder
users = []
url = "https://jsonplaceholder.typicode.com/users"
response = requests.get(url)
data = response.json()
for _ in range(10):  # Generate 10 sets of mock users
    for user in data:
        user_info = {
            "name": user['name'],
            "age": random.randint(18, 60),  # Generate random ages
            "city": user['address']['city']
        }
        users.append(user_info)

# Convert to DataFrame
df = pd.DataFrame(users)
print("DataFrame created:\n", df)

# Split data into train and validation sets
train_df = df.sample(frac=0.8, random_state=42)
valid_df = df.drop(train_df.index)

# Function to convert DataFrame to required format
def convert_to_json_format(df):
    lines = []
    for _, row in df.iterrows():
        input_text = f"Convert this to JSON: name is {row['name']}, age is {row['age']}, and city is {row['city']}."
        output_json = {"name": row['name'], "age": row['age'], "city": row['city']}
        lines.append(f"{input_text} -> {json.dumps(output_json)}")
    return lines

# Convert data to required format
train_lines = convert_to_json_format(train_df)
valid_lines = convert_to_json_format(valid_df)

# Write data to files
def write_lines_to_file(file_path, lines):
    with open(file_path, 'w') as file:
        for line in lines:
            file.write(f"{line}\n")

write_lines_to_file(os.path.join(data_dir, 'train.txt'), train_lines)
write_lines_to_file(os.path.join(data_dir, 'valid.txt'), valid_lines)

# Verify that the files have been written
print("Data written to train.txt and valid.txt successfully")


DataFrame created:
                         name  age            city
0              Leanne Graham   47     Gwenborough
1               Ervin Howell   36     Wisokyburgh
2           Clementine Bauch   31   McKenziehaven
3           Patricia Lebsack   19     South Elvis
4           Chelsey Dietrich   54      Roscoeview
..                       ...  ...             ...
95      Mrs. Dennis Schulist   21   South Christy
96           Kurtis Weissnat   26       Howemouth
97  Nicholas Runolfsdottir V   39       Aliyaview
98           Glenna Reichert   48  Bartholomebury
99        Clementina DuBuque   27     Lebsackbury

[100 rows x 3 columns]
Data written to train.txt and valid.txt successfully
