This is daily aggregation for pd3

In [1]:
%reload_ext sql
%config SqlMagic.autocommit=False
%config SqlMagic.autolimit=0
%config SqlMagic.autopandas=True
%config SqlMagic.displaylimit=200

In [2]:
%sql trino://localhost:9090/cuebiq/

'Connected: @cuebiq/'

In [3]:
pip install geohash2

Note: you may need to restart the kernel to use updated packages.


In [4]:
import gc
import os
import time
import logging
import geohash2
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime, timedelta
from trino.dbapi import connect

In [5]:
# SQL engine class
class TrinoEngine():
    def __init__(self):
        conn = connect(
            host="localhost",
            port=9090,
            catalog="cuebiq"
        )
        self.cur = conn.cursor()
        self.engine = create_engine("trino://localhost:9090/cuebiq/")
    
    def execute_statement(self, query:str) -> list:
        """
        Create and drop statements.
        """
        self.cur.execute(query)
        return self.cur.fetchall()
    
    def read_sql(self, query:str) -> pd.DataFrame: 
        """
        Select and insert into operations.
        """
        return pd.read_sql(query, self.engine)

    def read_sql_chunked(self, query: str, chunksize: int = 10000):
        return pd.read_sql(query, self.engine, chunksize=chunksize)

sql_engine = TrinoEngine()

In [6]:
# Function to insert data with retry mechanism
def insert_data_with_retry(df, table_name, con, retries=3, delay=5):
    for attempt in range(retries):
        try:
            df.to_sql(
                table_name, 
                con, 
                index=False, 
                if_exists="append", 
                method="multi"
            )
            logging.info(f"Inserted data into table {table_name}")
            return True
        except Exception as e:
            logging.error(f"Attempt {attempt+1} failed with error: {e}")
            if attempt < retries - 1:
                time.sleep(delay)
            else:
                logging.error(f"Failed to insert data into table {table_name} after {retries} attempts")
                return False

In [13]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Database connection setup
output_schema_name = 'pop_density'
con = create_engine(f"trino://localhost:9090/dedicated/{output_schema_name}")

# Define the input parameters
country_code = 'IN'
start_date = 20191115
end_date = 20191115

# Define the input schema and table name
schema_name = {'cda': 'cuebiq.paas_cda_pe_v3'}
pe_dl_table = f"{schema_name['cda']}.device_location_uplevelled"

# Convert integer dates to datetime objects
start_date_dt = datetime.strptime(str(start_date), '%Y%m%d')
end_date_dt = datetime.strptime(str(end_date), '%Y%m%d')

# Define the sorted longitude boundaries
sorted_lons = [67.5, 68.90625, 70.3125, 71.71875, 73.125, 
               74.53125, 75.9375, 77.34375, 78.75, 80.15625, 
               81.5625, 82.96875, 84.375, 85.78125, 87.1875, 
               88.59375]

# Initialize a list to store failed insertions
failed_inserts = []
# Define the buffer value
buffer_value = 20

In [28]:
# Loop through each day from start_date to end_date
current_date = start_date_dt
while current_date <= end_date_dt:
    try:
        # Calculate the lookback and lookahead dates
        lookback_date = current_date - timedelta(days=1)
        lookahead_date = current_date + timedelta(days=35)
        
        # Format dates for the SQL query
        formatted_lookback_date = lookback_date.strftime('%Y%m%d')
        formatted_current_date = current_date.strftime('%Y%m%d')
        formatted_lookahead_date = lookahead_date.strftime('%Y%m%d')
        
        # Loop through each longitude range
        for i in range(len(sorted_lons) + 1):
            if i == 0:
                min_lon = sorted_lons[i] - buffer_value
                max_lon = sorted_lons[i]
            elif i == len(sorted_lons):
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i - 1] + buffer_value
            else:
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i]
            
            # Construct the SQL query
            query = f"""
            SELECT 
                cuebiq_id, 
                event_zoned_datetime, 
                processing_date,
                lat,
                lng
            FROM {pe_dl_table}
            WHERE 
                processing_date BETWEEN {formatted_lookback_date} AND {formatted_lookahead_date}
                AND country_code = '{country_code}' 
                AND event_zoned_datetime IS NOT NULL
                AND TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) IS NOT NULL
                AND DATE(TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) = date_parse('{formatted_current_date}', '%Y%m%d')
                AND lng BETWEEN {min_lon} AND {max_lon}
            """
            
            logging.info(f"Executing SQL query for date {formatted_current_date} and longitude range {min_lon} to {max_lon}")
            
            try:
                pe_dl_table_gen = sql_engine.read_sql_chunked(query)
                
                # Convert the generator to a DataFrame
                chunks = [chunk for chunk in pe_dl_table_gen]
                if chunks:
                    pe_dl_table_df = pd.concat(chunks, ignore_index=True)
                    
                    # Calculate geohashes
                    pe_dl_table_df['geohash3'] = pe_dl_table_df.apply(lambda row: geohash2.encode(row['lat'], row['lng'], precision=3), axis=1)
                    
                    # Aggregate data for geohash3
                    aggregated_data_3 = pe_dl_table_df.groupby('geohash3').agg(
                        no_of_points=('geohash3', 'size'),
                        no_of_unique_users=('cuebiq_id', 'nunique')
                    ).reset_index()
                    
                    # Filter rows with no_of_unique_users > 10
                    filtered_data_3 = aggregated_data_3[aggregated_data_3['no_of_unique_users'] > 10].copy()
                    
                    # Add the local_date column
                    filtered_data_3.loc[:, 'local_date'] = formatted_current_date
                    
                    # Insert filtered aggregated data for geohash3 into SQL table
                    if not filtered_data_3.empty:
                        table_name_agg3 = f"pd_{country_code.lower()}_{formatted_current_date}_agg3_2"
                        if not insert_data_with_retry(filtered_data_3, table_name_agg3, con):
                            failed_inserts.append((formatted_current_date, min_lon, max_lon))
            
            except Exception as e:
                logging.error(f"Error while processing data for date {formatted_current_date} and longitude range {min_lon} to {max_lon}: {e}")
                failed_inserts.append((formatted_current_date, min_lon, max_lon))
    
    except Exception as e:
        logging.error(f"Error while processing data for date {formatted_current_date}: {e}")
        failed_inserts.append((formatted_current_date, None, None))
    
    # Move to the next day
    current_date += timedelta(days=1)

# Print the failed inserts
if failed_inserts:
    print("Failed inserts:")
    for failed_insert in failed_inserts:
        print(failed_insert)

logging.info("Data extraction, aggregation, and saving completed.")

2024-07-02 16:57:27,171 - INFO - Executing SQL query for date 20191115 and longitude range 47.5 to 67.5
2024-07-02 16:57:38,700 - ERROR - Error while processing data for date 20191115 and longitude range 47.5 to 67.5: cannot set a frame with no defined index and a scalar
2024-07-02 16:57:38,701 - INFO - Executing SQL query for date 20191115 and longitude range 67.5 to 68.90625


KeyboardInterrupt: 

In [9]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize the SQL engine
sql_engine = TrinoEngine()

# Database connection setup
output_schema_name = 'pop_density'
con = create_engine(f"trino://localhost:9090/dedicated/{output_schema_name}")

# Define the buffer value
buffer_value = 20
failed_inserts = []

In [10]:
# Function to process data for a given date and longitude range
def process_data_for_date_and_lon(formatted_current_date, min_lon, max_lon):
    try:
        lookback_date = datetime.strptime(formatted_current_date, '%Y%m%d') - timedelta(days=1)
        lookahead_date = datetime.strptime(formatted_current_date, '%Y%m%d') + timedelta(days=35)
        
        formatted_lookback_date = lookback_date.strftime('%Y%m%d')
        formatted_lookahead_date = lookahead_date.strftime('%Y%m%d')
        
        query = f"""
        SELECT 
            cuebiq_id, 
            event_zoned_datetime, 
            processing_date,
            lat,
            lng
        FROM cuebiq.paas_cda_pe_v3.device_location_uplevelled
        WHERE 
            processing_date BETWEEN {formatted_lookback_date} AND {formatted_lookahead_date}
            AND country_code = '{country_code}' 
            AND event_zoned_datetime IS NOT NULL
            AND TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) IS NOT NULL
            AND DATE(TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) = date_parse('{formatted_current_date}', '%Y%m%d')
            AND lng BETWEEN {min_lon} AND {max_lon}
        """
        
        logging.info(f"Executing SQL query for date {formatted_current_date} and longitude range {min_lon} to {max_lon}")
        
        try:
            pe_dl_table_gen = sql_engine.read_sql_chunked(query)
            
            chunks = [chunk for chunk in pe_dl_table_gen]
            if chunks:
                pe_dl_table_df = pd.concat(chunks, ignore_index=True)
                
                pe_dl_table_df['geohash3'] = pe_dl_table_df.apply(lambda row: geohash2.encode(row['lat'], row['lng'], precision=3), axis=1)
                
                aggregated_data_3 = pe_dl_table_df.groupby('geohash3').agg(
                    no_of_points=('geohash3', 'size'),
                    no_of_unique_users=('cuebiq_id', 'nunique')
                ).reset_index()
                
                filtered_data_3 = aggregated_data_3[aggregated_data_3['no_of_unique_users'] > 10].copy()
                
                filtered_data_3.loc[:, 'local_date'] = formatted_current_date
                
                if not filtered_data_3.empty:
                    table_name_agg3 = f"pd_{country_code.lower()}_{formatted_current_date}_agg3"
                    if not insert_data_with_retry(filtered_data_3, table_name_agg3, con):
                        failed_inserts.append((formatted_current_date, min_lon, max_lon))
        
        except Exception as e:
            logging.error(f"Error while processing data for date {formatted_current_date} and longitude range {min_lon} to {max_lon}: {e}")
            failed_inserts.append((formatted_current_date, min_lon, max_lon))
    
    except Exception as e:
        logging.error(f"Error while processing data for date {formatted_current_date}: {e}")
        failed_inserts.append((formatted_current_date, None, None))


In [12]:
# Define the input parameters
country_code = 'IN'
missing_sections = [
    ('20190603', 73.125, 74.53125),
    ('20190620', 87.1875, 88.59375),
    ('20190620', 88.59375, 108.59375),
    ('20190715', 71.71875, 73.125),
    ('20190721', 88.59375, 108.59375),
    ('20190723', 75.9375, 77.34375),
    ('20190723', 77.34375, 78.75),
    ('20190724', 80.15625, 81.5625),
    ('20190724', 81.5625, 82.96875),
    ('20190731', 67.5, 68.90625),
    ('20190801', 73.125, 74.53125),
    ('20190801', 74.53125, 75.9375),
    ('20190804', 82.96875, 84.375),
    ('20190804', 84.375, 85.78125),
    ('20190808', 78.75, 80.15625),
    ('20190808', 80.15625, 81.5625),
    ('20190808', 81.5625, 82.96875),
    ('20190812', 71.71875, 73.125),
    ('20190815', 77.34375, 78.75),
    ('20190815', 78.75, 80.15625),
    ('20190820', 67.5, 68.90625),
    ('20190821', 75.9375, 77.34375),
    ('20190824', 75.9375, 77.34375),
    ('20190824', 77.34375, 78.75)
]

# Loop through each missing section
for section in missing_sections:
    formatted_current_date, min_lon, max_lon = section
    process_data_for_date_and_lon(formatted_current_date, min_lon, max_lon)

# Print the failed inserts
if failed_inserts:
    print("Failed inserts:")
    for failed_insert in failed_inserts:
        print(failed_insert)

logging.info("Data extraction, aggregation, and saving completed for missing sections.")

2024-06-13 22:47:20,183 - INFO - Executing SQL query for date 20190603 and longitude range 73.125 to 74.53125
2024-06-13 22:48:28,948 - INFO - Inserted data into table pd_in_20190603_agg3
2024-06-13 22:48:29,020 - INFO - Executing SQL query for date 20190620 and longitude range 87.1875 to 88.59375
2024-06-13 22:49:25,742 - INFO - Inserted data into table pd_in_20190620_agg3
2024-06-13 22:49:25,797 - INFO - Executing SQL query for date 20190620 and longitude range 88.59375 to 108.59375
2024-06-13 22:50:08,892 - INFO - Inserted data into table pd_in_20190620_agg3
2024-06-13 22:50:08,928 - INFO - Executing SQL query for date 20190715 and longitude range 71.71875 to 73.125
2024-06-13 22:51:37,198 - INFO - Inserted data into table pd_in_20190715_agg3
2024-06-13 22:51:37,304 - INFO - Executing SQL query for date 20190721 and longitude range 88.59375 to 108.59375
2024-06-13 22:52:19,114 - INFO - Inserted data into table pd_in_20190721_agg3
2024-06-13 22:52:19,145 - INFO - Executing SQL query 

In [15]:
# Define the specific dates to run the script
# specific_dates = ['20190522', '20191226']
specific_dates = ['20191126']

# Initialize logging
logging.basicConfig(level=logging.INFO)

# Initialize a list to store failed inserts
failed_inserts = []

# Loop through each specific date
for date_str in specific_dates:
    try:
        # Parse the current date
        current_date = datetime.strptime(date_str, '%Y%m%d')
        
        # Calculate the lookback and lookahead dates
        lookback_date = current_date - timedelta(days=1)
        lookahead_date = current_date + timedelta(days=35)
        
        # Format dates for the SQL query
        formatted_lookback_date = lookback_date.strftime('%Y%m%d')
        formatted_current_date = current_date.strftime('%Y%m%d')
        formatted_lookahead_date = lookahead_date.strftime('%Y%m%d')
        
        # Loop through each longitude range
        for i in range(len(sorted_lons) + 1):
            if i == 0:
                min_lon = sorted_lons[i] - buffer_value
                max_lon = sorted_lons[i]
            elif i == len(sorted_lons):
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i - 1] + buffer_value
            else:
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i]
            
            # Construct the SQL query
            query = f"""
            SELECT 
                cuebiq_id, 
                event_zoned_datetime, 
                processing_date,
                lat,
                lng
            FROM {pe_dl_table}
            WHERE 
                processing_date BETWEEN {formatted_lookback_date} AND {formatted_lookahead_date}
                AND country_code = '{country_code}' 
                AND event_zoned_datetime IS NOT NULL
                AND TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) IS NOT NULL
                AND DATE(TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) = date_parse('{formatted_current_date}', '%Y%m%d')
                AND lng BETWEEN {min_lon} AND {max_lon}
            """
            
            logging.info(f"Executing SQL query for date {formatted_current_date} and longitude range {min_lon} to {max_lon}")
            
            try:
                pe_dl_table_gen = sql_engine.read_sql_chunked(query)
                
                # Convert the generator to a DataFrame
                chunks = [chunk for chunk in pe_dl_table_gen]
                if chunks:
                    pe_dl_table_df = pd.concat(chunks, ignore_index=True)
                    
                    # Calculate geohashes
                    pe_dl_table_df['geohash3'] = pe_dl_table_df.apply(lambda row: geohash2.encode(row['lat'], row['lng'], precision=3), axis=1)
                    
                    # Aggregate data for geohash3
                    aggregated_data_3 = pe_dl_table_df.groupby('geohash3').agg(
                        no_of_points=('geohash3', 'size'),
                        no_of_unique_users=('cuebiq_id', 'nunique')
                    ).reset_index()
                    
                    # Filter rows with no_of_unique_users > 10
                    filtered_data_3 = aggregated_data_3[aggregated_data_3['no_of_unique_users'] > 10].copy()
                    
                    # Add the local_date column
                    filtered_data_3.loc[:, 'local_date'] = formatted_current_date
                    
                    # Save filtered aggregated data for geohash3 to CSV file
                    if not filtered_data_3.empty:
                        csv_filename = f"/home/jovyan/Data/Fixing/pd_{country_code.lower()}_fix_agg3_2.csv"
                        filtered_data_3.to_csv(csv_filename, mode='a', index=False, header=not pd.io.common.file_exists(csv_filename))
            
            except Exception as e:
                logging.error(f"Error while processing data for date {formatted_current_date} and longitude range {min_lon} to {max_lon}: {e}")
                failed_inserts.append((formatted_current_date, min_lon, max_lon))
    
    except Exception as e:
        logging.error(f"Error while processing data for date {formatted_current_date}: {e}")
        failed_inserts.append((formatted_current_date, None, None))

# Print the failed inserts
if failed_inserts:
    print("Failed inserts:")
    for failed_insert in failed_inserts:
        print(failed_insert)

logging.info("Data extraction, aggregation, and saving completed.")


2024-07-03 04:22:29,932 - INFO - Executing SQL query for date 20191126 and longitude range 47.5 to 67.5
2024-07-03 04:22:36,282 - ERROR - Error while processing data for date 20191126 and longitude range 47.5 to 67.5: cannot set a frame with no defined index and a scalar
2024-07-03 04:22:36,283 - INFO - Executing SQL query for date 20191126 and longitude range 67.5 to 68.90625
2024-07-03 04:22:50,493 - INFO - Executing SQL query for date 20191126 and longitude range 68.90625 to 70.3125
2024-07-03 04:23:29,484 - INFO - Executing SQL query for date 20191126 and longitude range 70.3125 to 71.71875
2024-07-03 04:23:47,342 - INFO - Executing SQL query for date 20191126 and longitude range 71.71875 to 73.125
2024-07-03 04:24:36,201 - INFO - Executing SQL query for date 20191126 and longitude range 73.125 to 74.53125
2024-07-03 04:25:16,751 - INFO - Executing SQL query for date 20191126 and longitude range 74.53125 to 75.9375
2024-07-03 04:26:26,637 - INFO - Executing SQL query for date 20191

Failed inserts:
('20191126', 47.5, 67.5)


In [None]:
# Define the specific dates to run the script
specific_dates = ['20191224', '20191226', '20190525', '20191014', '20190522']

# Initialize logging
logging.basicConfig(level=logging.INFO)

# Initialize a list to store failed inserts
failed_inserts = []

# Loop through each specific date
for date_str in specific_dates:
    try:
        # Parse the current date
        current_date = datetime.strptime(date_str, '%Y%m%d')
        
        # Calculate the lookback and lookahead dates
        lookback_date = current_date - timedelta(days=1)
        lookahead_date = current_date + timedelta(days=35)
        
        # Format dates for the SQL query
        formatted_lookback_date = lookback_date.strftime('%Y%m%d')
        formatted_current_date = current_date.strftime('%Y%m%d')
        formatted_lookahead_date = lookahead_date.strftime('%Y%m%d')
        
        # Loop through each longitude range
        for i in range(len(sorted_lons) + 1):
            if i == 0:
                min_lon = sorted_lons[i] - buffer_value
                max_lon = sorted_lons[i]
            elif i == len(sorted_lons):
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i - 1] + buffer_value
            else:
                min_lon = sorted_lons[i - 1]
                max_lon = sorted_lons[i]
            
            # Construct the SQL query
            query = f"""
            SELECT 
                cuebiq_id, 
                event_zoned_datetime, 
                processing_date,
                lat,
                lng
            FROM {pe_dl_table}
            WHERE 
                processing_date BETWEEN {formatted_lookback_date} AND {formatted_lookahead_date}
                AND country_code = '{country_code}' 
                AND event_zoned_datetime IS NOT NULL
                AND TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s')) IS NOT NULL
                AND DATE(TRY(date_parse(substr(event_zoned_datetime, 1, 19), '%Y-%m-%dT%H:%i:%s'))) = date_parse('{formatted_current_date}', '%Y%m%d')
                AND lng BETWEEN {min_lon} AND {max_lon}
            """
            
            logging.info(f"Executing SQL query for date {formatted_current_date} and longitude range {min_lon} to {max_lon}")
            
            try:
                pe_dl_table_gen = sql_engine.read_sql_chunked(query)
                
                # Convert the generator to a DataFrame
                chunks = [chunk for chunk in pe_dl_table_gen]
                if chunks:
                    pe_dl_table_df = pd.concat(chunks, ignore_index=True)
                    
                    # Calculate geohashes
                    pe_dl_table_df['geohash3'] = pe_dl_table_df.apply(lambda row: geohash2.encode(row['lat'], row['lng'], precision=3), axis=1)
                    
                    # Aggregate data for geohash3
                    aggregated_data_3 = pe_dl_table_df.groupby('geohash3').agg(
                        no_of_points=('geohash3', 'size'),
                        no_of_unique_users=('cuebiq_id', 'nunique')
                    ).reset_index()
                    
                    # Filter rows with no_of_unique_users > 10
                    filtered_data_3 = aggregated_data_3[aggregated_data_3['no_of_unique_users'] > 10].copy()
                    
                    # Add the local_date column
                    filtered_data_3.loc[:, 'local_date'] = formatted_current_date
                    
                    # Save filtered aggregated data for geohash3 to CSV file
                    if not filtered_data_3.empty:
                        csv_filename = f"/home/jovyan/Data/Fixing/pd_{country_code.lower()}_fix_agg3_2.csv"
                        filtered_data_3.to_csv(csv_filename, mode='a', index=False, header=not pd.io.common.file_exists(csv_filename))
            
            except Exception as e:
                logging.error(f"Error while processing data for date {formatted_current_date} and longitude range {min_lon} to {max_lon}: {e}")
                failed_inserts.append((formatted_current_date, min_lon, max_lon))
    
    except Exception as e:
        logging.error(f"Error while processing data for date {formatted_current_date}: {e}")
        failed_inserts.append((formatted_current_date, None, None))

# Print the failed inserts
if failed_inserts:
    print("Failed inserts:")
    for failed_insert in failed_inserts:
        print(failed_insert)

logging.info("Data extraction, aggregation, and saving completed.")


2024-07-02 19:53:42,463 - INFO - Executing SQL query for date 20191224 and longitude range 47.5 to 67.5
2024-07-02 19:54:00,062 - ERROR - Error while processing data for date 20191224 and longitude range 47.5 to 67.5: cannot set a frame with no defined index and a scalar
2024-07-02 19:54:00,062 - INFO - Executing SQL query for date 20191224 and longitude range 67.5 to 68.90625
2024-07-02 19:55:01,796 - INFO - Executing SQL query for date 20191224 and longitude range 68.90625 to 70.3125
2024-07-02 19:55:58,104 - INFO - Executing SQL query for date 20191224 and longitude range 70.3125 to 71.71875
2024-07-02 19:57:31,571 - INFO - Executing SQL query for date 20191224 and longitude range 71.71875 to 73.125
2024-07-02 19:59:43,656 - INFO - Executing SQL query for date 20191224 and longitude range 73.125 to 74.53125
2024-07-02 20:01:48,195 - INFO - Executing SQL query for date 20191224 and longitude range 74.53125 to 75.9375
2024-07-02 20:03:24,111 - INFO - Executing SQL query for date 20191

In [36]:
# Construct the SQL query
query = f"""
SELECT *
FROM dedicated.pop_density.pd_in_20191126_agg3_2
"""


pe_dl_table_gen = sql_engine.read_sql(query)
pe_dl_table_gen

Unnamed: 0,geohash3,no_of_points,no_of_unique_users,local_date
0,tdk,188,13,20191126
1,tds,32656,796,20191126
2,tdu,75673,1574,20191126
3,teh,36127,844,20191126
4,tek,293023,6308,20191126
...,...,...,...,...
167,whe,5601,158,20191126
168,whf,11756,290,20191126
169,whg,32171,696,20191126
170,whu,6001,128,20191126


In [37]:
pe_dl_table_gen.to_csv('/home/jovyan/Data/Fixing/pd_in_20191126_agg3_2.csv', mode='a', header=False, index=False)
